Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --------------Swamy Muthu---------------------
- how to import data into partitioned hive tables from sqoop
- ------------working code------------------
- Data without rowid
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM MATERIAL_MASTER A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
- --in dev
- material master 10,162,316 164 sec 1.4 GB conventional mappers 8 fetch-size 2000 to 3000
- ---query from partition-----------
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT PARTITION (HP_COMP_2013_00) A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20130101, 20131231 from dual' --split-by YYYYMMDD
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20110101, 20141231 from dual' --split-by YYYYMMDD
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20020101, 20161231 from dual' --split-by YYYYMMDD
- --boundary-query cannot be used with Query
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --boundary-query 'SELECT MIN(ROWIDTOCHAR(ROWID)), MAX(ROWIDTOCHAR(ROWID)) FROM HP_COMPONENT_SMALL '
- --table with hive
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' --direct -m 4 --hive-import
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER_SMALL --target-dir /user/centos7/material_master_small --fields-terminated-by '\001' --direct -m 4 --hive-import
- --check performance
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM MATERIAL_MASTER A WHERE $CONDITIONS' --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" --fetch-size=2000
- 168.7249 seconds
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --direct
- 106 seconds.
- -- direct is working
- sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --direct
- sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT SMALL A ' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --direct
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' --direct -m 4 --split-by "ROWIDTOCHAR(ROWID)"
- --sqoop export
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /user/centos7/hp_component_small --fields-terminated-by '\001'
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /apps/hive/warehouse/hp_component_small_s --fields-terminated-by '\001'
- ---------------------------------------
- material master 10,162,316 164 sec 1.4 GB conventional mappers 4
- material master 10,162,316 131 sec 1.4 GB conventional mappers 8
- material master 10,162,316 134 sec 1.4 GB conventional mappers 12
- material master 10,162,316 164 sec 1.4 GB Direct
- select round(50000000000/10000000*134/60/60/24) days from dual;
- sqoop import --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --query 'select ROWIDTOCHAR(ROWID) RID,A.* FROM HPE_PL_TMP A WHERE $CONDITIONS' --split-by RID -m 4 --target-dir /user/centos7
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --query 'select ROWIDTOCHAR(ROWID) RID,A.* FROM HP_UNIT_SERIAL_NUMBER_TEST A WHERE $CONDITIONS' --split-by "ROWIDTOCHAR(ROWID)" --target-dir /user/centos7/hp_unit_serial_number_test --fields-terminated-by '\001' -m 4
- --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' -m 4
- sqoop import --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 4
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 4
- ------------------------------Tuning --------------------------------
- try --direct import
- --compression
- --default fetch size 1000 can be increased?
- -- batch export
- ------------------------final design scripts ---------------------------------
- 1.Import Direct import, full table,
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT --fields-terminated-by '\001' --direct -m 12 --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,QUANTITY=INT --hive-import
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER_SMALL --target-dir /user/centos7/material_master_small --fields-terminated-by '\001' --direct -m 4 --hive-import
- 2. Filter in hive
- create table hp_component_small_s as select * from hp_component_small hp where hp.material_number in(select m.material_number from material_master_small m);
- -- need to increase mappers!!
- 3. Export hive table to Oracle
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /apps/hive/warehouse/hp_component_small_s --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp'
- --direct gives ORA-01858: a non-numeric character
- --alternative use nologging??
- -------------------------------------------------------------------------- time estimates-------------------------------
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER --fields-terminated-by '\001' --direct -m 8 --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT --fetch-size 2000 -m 8 --hive-import
- create table unit_serial_number_s as select * from unit_serial_number hp where hp.material_number in(select m.material_number from material_master_small m);
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string ""
- import :
- unit_serial_number count 51879578 size 6 GB time 412 secs
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table J_TEST --fields-terminated-by '\001' --direct --null-string "" --null-non-string "" --map-column-hive NO=INT --hive-import
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table J_TEST_EXPORT --export-dir /apps/hive/warehouse/j_test --input-fields-terminated-by '\001'
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" -m 8
- sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /user/centos7/test/bad_data --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" -m 8
- ------------------------ode_exploded_spi-------------------------------------------
- --below ones successfully work, however Pin column should not be used as split by column as it has null values.
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ODE_EXPLODED_SPI --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by PIN --hive-drop-import-delims --hive-import
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ODE_EXPLODED_SPI_EXPORT --export-dir /apps/hive/warehouse/ode_exploded_spi --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' -m 8
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query "select pin,RELATED_PIN,COMPOSED_OF_RSN,DATE_UPDATED,DATE_END,DATE_CREATED,PIN_OPT,RELATED_PIN_OPT,AS_MANUFACTURED,AS_PLANNED,AS_SUPPORTED,AS_REPAIRED,QUANTITY,CREW_ELIGIBLE,OPTION_EFFECT,CRITICAL_FLAG,YYYYMMDD,STATUS,CORP_DESCRIPTION,replace(replace(ENHANCE_DESCRIPTION,chr(10),''),chr(13),'') as ENHANCE_DESCRIPTION,KEYWORD,CATAGORY from ode_exploded_spi where \$CONDITIONS" --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by PIN --hive-import --hive-table ode_exploded_spi
- -----------------unit_serial_number export failing---------------
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select * from UNIT_SERIAL_NUMBER where $CONDITIONS' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --target-dir /apps/hive/warehouse/unit_serial_number --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT --fetch-size 2000 --hive-drop-import-delims -m 8 --split-by "ROWIDTOCHAR(ROWID)" --hive-import --hive-table unit_serial_number
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --input-null-string '\\N' --input-null-non-string '\\N' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT -m 8
- -----------------------------------iss diagnostics details works --------------------------------
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table ISS_DIAGNOSTICS_DETAILS --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by YYYYMMDD --hive-drop-import-delims --hive-import
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ISS_DIAGNOSTICS_DETAILS_EXPORT --export-dir /apps/hive/warehouse/iss_diagnostics_details --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' -m 8
- import 5995461 1.3088 GB in 153.1098 seconds
- ----------------------------------------------------build operations hist
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table CQCS_DBA.BUILD_OPERATION_HIST --fields-terminated-by '\001' --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/build_operation_hist -m 8 --split-by ITEM_ID --hive-drop-import-delims --hive-import
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_EXPORT --export-dir /apps/hive/warehouse/build_operation_hist --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' -m 8
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_BAD --fields-terminated-by '\001' --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/build_operation_hist_bad -m 1 --hive-drop-import-delims --hive-import
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_EXPORT --export-dir /apps/hive/warehouse/build_operation_hist --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' -m 8
- import 5995461 1.3088 GB in 153.1098 seconds
- ----------------------------------------------------------partition import -----------------------------------------
- sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select serial_number,yyyymmdd,substr(yyyymmdd,1,4) as year from NCC_SMALL where yyyymmdd between 20120101 and 20121231 and $CONDITIONS' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --hive-drop-import-delims -m 4 --split-by "ROWIDTOCHAR(ROWID)" --hcatalog-database default -hcatalog-table NCC_SMALL
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table NCC_SMALL_ORC --export-dir /apps/hive/warehouse/ncc_small_orc --input-fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' -m 4
- ----------------------- hbase import------------------------------------------
- sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HUSN_HBASE --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" -m 1 --hive-drop-import-delims --hbase-table husn_hbase --column-family cf --hbase-create-table --hbase-row-key SERIAL_NUMBER
- sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HUSN_HBASE --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" -m 1 --hive-drop-import-delims --hbase-table husn_hbase1 --column-family cf --hbase-create-table --hbase-row-key 'SERIAL_NUMBER,MATERIAL_NUMBER'
- -------------------------export from hbase/hive ---------------
- load hive from hbase and export into Oracle
- sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HBTABLE --export-dir /apps/hive/warehouse/hbtable --input-fields-terminated-by '\001' -m 1
- ------------------------------------password file ---------------------------
- echo "my-secret-password" > sqoop.password
- hadoop dfs -put sqoop.password /user/$USER/sqoop.password
- hadoop dfs -chown 400 /user/$USER/sqoop.password
- -------------------------------------------------------------------------------
- --as-avrodatafile
- --compress
- --compression-codec snappy or lzop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement