The following bash script can be used to import Oracle records into a Hive table, partitioned by date. It uses Sqoop. Both Hive and Sqoop are part of typical Hadoop distributions, like the Hortonworks Sandbox, for example.
#!/bin/sh function upper() { echo "$1" | tr [a-z] [A-Z] } if [ $# -ge 5 ]; then schema=$(upper $1) table=$(upper $2) column_to_split_by=$(upper $3) date_column=$(upper $4) date_value="$5" else echo echo "Usage: $(basename $0) schema table column-to-split-by date-column YYYY-MM-DD" echo echo "Imports all records where value of date-column is \$date_value from" echo "Oracle table \$schema.\$table as a Hive table partition." echo "Hadoop will split the import job based on the column-to-split-by." echo "* The table must have the columns specified as column-to-split-by and date-column." echo "* The column-to-split-by must be finer granularity than date-column, ideally unique." echo "* The date_value must be in YYYY-MM-DD format." echo "* If date_value is unspecified, the current date will be used." exit 1 fi echo "schema = $schema" echo "table = $table" echo "column_to_split_by = $column_to_split_by" echo "date_column = $date_column" echo "date_value = $date_value" # we have to drop the partition, because --hive-overwrite does not seem to do it hive -e "use $schema; alter table $table drop if exists partition($date_column='$date_value');" columns=$( \ sqoop eval \ --options-file /usr/local/etc/sqoop-options.txt \ --query "select column_name from all_tab_columns where table_name = '$table'" \ | tr -d " |" \ | grep -Ev "\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-|COLUMN_NAME|$date_column" \ | tr '\n' ',' \ | sed -e 's/\,$//' ) query="select $columns from $schema.$table \ where $date_column = to_date('$date_value', 'YYYY-MM-DD') \ and \$CONDITIONS" echo "query = $query" sqoop import \ --options-file "/usr/local/etc/sqoop-options.txt" \ --query "$query" \ --split-by "$column_to_split_by" \ --target-dir "$schema.$table" \ --hive-import \ --hive-overwrite \ --hive-table "$schema.$table" \ --hive-partition-key "$date_column" \ --hive-partition-value "$date_value" \ --outdir $HOME/java
JDBC connection details
Put them into /usr/local/etc/sqoop-options.txt, in a format like this:
--connect
jdbc:oracle:thin:@hostname:port:hostname
--username
oracle_username
--password
oracle_password