Advertisement
Guest User

Untitled

a guest
May 6th, 2017
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 22.34 KB | None | 0 0
  1. --------------Swamy Muthu---------------------
  2. how to import data into partitioned hive tables from sqoop
  3.  
  4. ------------working code------------------
  5. Data without rowid
  6. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
  7.  
  8. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM MATERIAL_MASTER A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
  9.  
  10.  
  11. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" -z --fetch-size=2000
  12.  
  13. --in dev
  14. material master 10,162,316 164 sec 1.4 GB conventional mappers 8 fetch-size 2000 to 3000
  15. ---query from partition-----------
  16. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT PARTITION (HP_COMP_2013_00) A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20130101, 20131231 from dual' --split-by YYYYMMDD
  17.  
  18. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20110101, 20141231 from dual' --split-by YYYYMMDD
  19.  
  20. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_2013 --fields-terminated-by '\001' -m 4 --boundary-query 'select 20020101, 20161231 from dual' --split-by YYYYMMDD
  21.  
  22. --boundary-query cannot be used with Query
  23. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where A.MATERIAL_NUMBER in(select M.MATERIAL_NUMBER FROM MATERIAL_MASTER_SMALL M ) AND $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --boundary-query 'SELECT MIN(ROWIDTOCHAR(ROWID)), MAX(ROWIDTOCHAR(ROWID)) FROM HP_COMPONENT_SMALL '
  24.  
  25. --table with hive
  26. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' --direct -m 4 --hive-import
  27.  
  28. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER_SMALL --target-dir /user/centos7/material_master_small --fields-terminated-by '\001' --direct -m 4 --hive-import
  29.  
  30. --check performance
  31. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM MATERIAL_MASTER A WHERE $CONDITIONS' --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --split-by "ROWIDTOCHAR(ROWID)" --fetch-size=2000
  32. 168.7249 seconds
  33.  
  34. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 8 --direct
  35. 106 seconds.
  36. -- direct is working
  37. sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --direct
  38.  
  39. sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT SMALL A ' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' -m 4 --direct
  40.  
  41. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select A.* FROM HP_COMPONENT_SMALL A where $CONDITIONS' --target-dir /user/centos7/hp_component_small --fields-terminated-by '\001' --direct -m 4 --split-by "ROWIDTOCHAR(ROWID)"
  42.  
  43. --sqoop export
  44. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /user/centos7/hp_component_small --fields-terminated-by '\001'
  45.  
  46. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /apps/hive/warehouse/hp_component_small_s --fields-terminated-by '\001'
  47. ---------------------------------------
  48. material master 10,162,316 164 sec 1.4 GB conventional mappers 4
  49. material master 10,162,316 131 sec 1.4 GB conventional mappers 8
  50. material master 10,162,316 134 sec 1.4 GB conventional mappers 12
  51. material master 10,162,316 164 sec 1.4 GB Direct
  52.  
  53. select round(50000000000/10000000*134/60/60/24) days from dual;
  54.  
  55. sqoop import --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --query 'select ROWIDTOCHAR(ROWID) RID,A.* FROM HPE_PL_TMP A WHERE $CONDITIONS' --split-by RID -m 4 --target-dir /user/centos7
  56.  
  57.  
  58. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --query 'select ROWIDTOCHAR(ROWID) RID,A.* FROM HP_UNIT_SERIAL_NUMBER_TEST A WHERE $CONDITIONS' --split-by "ROWIDTOCHAR(ROWID)" --target-dir /user/centos7/hp_unit_serial_number_test --fields-terminated-by '\001' -m 4
  59.  
  60.  
  61. --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' -m 4
  62.  
  63. sqoop import --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 4
  64.  
  65. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER --target-dir /user/centos7/material_master --fields-terminated-by '\001' -m 4
  66. ------------------------------Tuning --------------------------------
  67. try --direct import
  68. --compression
  69. --default fetch size 1000 can be increased?
  70. -- batch export
  71. ------------------------final design scripts ---------------------------------
  72. 1.Import Direct import, full table,
  73.  
  74. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT --fields-terminated-by '\001' --direct -m 12 --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,QUANTITY=INT --hive-import
  75.  
  76. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table MATERIAL_MASTER_SMALL --target-dir /user/centos7/material_master_small --fields-terminated-by '\001' --direct -m 4 --hive-import
  77.  
  78.  
  79. 2. Filter in hive
  80. create table hp_component_small_s as select * from hp_component_small hp where hp.material_number in(select m.material_number from material_master_small m);
  81. -- need to increase mappers!!
  82. 3. Export hive table to Oracle
  83. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HP_COMPONENT_SMALL_EXPORT --export-dir /apps/hive/warehouse/hp_component_small_s --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp'
  84. --direct gives ORA-01858: a non-numeric character
  85. --alternative use nologging??
  86. -------------------------------------------------------------------------- time estimates-------------------------------
  87. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER --fields-terminated-by '\001' --direct -m 8 --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT --fetch-size 2000 -m 8 --hive-import
  88.  
  89. create table unit_serial_number_s as select * from unit_serial_number hp where hp.material_number in(select m.material_number from material_master_small m);
  90.  
  91. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string ""
  92.  
  93. import :
  94. unit_serial_number count 51879578 size 6 GB time 412 secs
  95.  
  96.  
  97.  
  98. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table J_TEST --fields-terminated-by '\001' --direct --null-string "" --null-non-string "" --map-column-hive NO=INT --hive-import
  99.  
  100. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table J_TEST_EXPORT --export-dir /apps/hive/warehouse/j_test --input-fields-terminated-by '\001'
  101.  
  102. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" -m 8
  103.  
  104.  
  105. sqoop export --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /user/centos7/test/bad_data --input-fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" -m 8
  106.  
  107. ------------------------ode_exploded_spi-------------------------------------------
  108. --below ones successfully work, however Pin column should not be used as split by column as it has null values.
  109.  
  110.  
  111. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ODE_EXPLODED_SPI --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by PIN --hive-drop-import-delims --hive-import
  112.  
  113. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ODE_EXPLODED_SPI_EXPORT --export-dir /apps/hive/warehouse/ode_exploded_spi --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' -m 8
  114.  
  115. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query "select pin,RELATED_PIN,COMPOSED_OF_RSN,DATE_UPDATED,DATE_END,DATE_CREATED,PIN_OPT,RELATED_PIN_OPT,AS_MANUFACTURED,AS_PLANNED,AS_SUPPORTED,AS_REPAIRED,QUANTITY,CREW_ELIGIBLE,OPTION_EFFECT,CRITICAL_FLAG,YYYYMMDD,STATUS,CORP_DESCRIPTION,replace(replace(ENHANCE_DESCRIPTION,chr(10),''),chr(13),'') as ENHANCE_DESCRIPTION,KEYWORD,CATAGORY from ode_exploded_spi where \$CONDITIONS" --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,DATE_END=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by PIN --hive-import --hive-table ode_exploded_spi
  116. -----------------unit_serial_number export failing---------------
  117.  
  118. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select * from UNIT_SERIAL_NUMBER where $CONDITIONS' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --target-dir /apps/hive/warehouse/unit_serial_number --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT --fetch-size 2000 --hive-drop-import-delims -m 8 --split-by "ROWIDTOCHAR(ROWID)" --hive-import --hive-table unit_serial_number
  119.  
  120. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table UNIT_SERIAL_NUMBER_EXPORT --export-dir /apps/hive/warehouse/unit_serial_number --input-fields-terminated-by '\001' --input-null-string '\\N' --input-null-non-string '\\N' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp,MANUFACTURE_DATE=java.sql.Timestamp,CCP_CONFIGURATION_DATE=java.sql.Timestamp' --map-column-hive YYYYMMDD=INT,CC_INDICATOR=INT -m 8
  121. -----------------------------------iss diagnostics details works --------------------------------
  122. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@gcu90609.houston.hp.com:1526/psnri --username SNR_DBA --password wwsnrs_dba_2010 --table ISS_DIAGNOSTICS_DETAILS --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/ode_exploded_spi -m 8 --split-by YYYYMMDD --hive-drop-import-delims --hive-import
  123.  
  124. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table ISS_DIAGNOSTICS_DETAILS_EXPORT --export-dir /apps/hive/warehouse/iss_diagnostics_details --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' -m 8
  125. import 5995461 1.3088 GB in 153.1098 seconds
  126. ----------------------------------------------------build operations hist
  127. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table CQCS_DBA.BUILD_OPERATION_HIST --fields-terminated-by '\001' --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/build_operation_hist -m 8 --split-by ITEM_ID --hive-drop-import-delims --hive-import
  128.  
  129. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_EXPORT --export-dir /apps/hive/warehouse/build_operation_hist --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' -m 8
  130.  
  131.  
  132. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_BAD --fields-terminated-by '\001' --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' --null-string "" --null-non-string "" --target-dir /apps/hive/warehouse/build_operation_hist_bad -m 1 --hive-drop-import-delims --hive-import
  133.  
  134. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table BUILD_OPERATION_HIST_EXPORT --export-dir /apps/hive/warehouse/build_operation_hist --input-fields-terminated-by '\001' --null-string "" --null-non-string "" --map-column-java 'MODIFY_DATE=java.sql.Timestamp,SA_WEEK_MFG=java.sql.Timestamp,ASSLY_DATE=java.sql.Timestamp,SFT_START_DATE=java.sql.Timestamp,SFT_COMP_DATE=java.sql.Timestamp,SFT_ASSLY_DATE=java.sql.Timestamp' -m 8
  135. import 5995461 1.3088 GB in 153.1098 seconds
  136. ----------------------------------------------------------partition import -----------------------------------------
  137.  
  138. sqoop import -Dorg.apache.sqoop.splitter.allow_text_splitter=true -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --query 'select serial_number,yyyymmdd,substr(yyyymmdd,1,4) as year from NCC_SMALL where yyyymmdd between 20120101 and 20121231 and $CONDITIONS' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --hive-drop-import-delims -m 4 --split-by "ROWIDTOCHAR(ROWID)" --hcatalog-database default -hcatalog-table NCC_SMALL
  139.  
  140. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table NCC_SMALL_ORC --export-dir /apps/hive/warehouse/ncc_small_orc --input-fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' -m 4
  141. ----------------------- hbase import------------------------------------------
  142. sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HUSN_HBASE --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" -m 1 --hive-drop-import-delims --hbase-table husn_hbase --column-family cf --hbase-create-table --hbase-row-key SERIAL_NUMBER
  143.  
  144. sqoop import --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HUSN_HBASE --fields-terminated-by '\001' --map-column-java 'DATE_CREATED=java.sql.Timestamp,DATE_UPDATED=java.sql.Timestamp' --null-string "" --null-non-string "" -m 1 --hive-drop-import-delims --hbase-table husn_hbase1 --column-family cf --hbase-create-table --hbase-row-key 'SERIAL_NUMBER,MATERIAL_NUMBER'
  145.  
  146. -------------------------export from hbase/hive ---------------
  147. load hive from hbase and export into Oracle
  148. sqoop export -Dorg.apache.sqoop.export.text.dump_data_on_error=true --connect jdbc:oracle:thin:@g9u1045.houston.hp.com:1525/psnrd --username SNR_DBA --password wwsnrs_dba_2010 --table HBTABLE --export-dir /apps/hive/warehouse/hbtable --input-fields-terminated-by '\001' -m 1
  149.  
  150. ------------------------------------password file ---------------------------
  151. echo "my-secret-password" > sqoop.password
  152. hadoop dfs -put sqoop.password /user/$USER/sqoop.password
  153. hadoop dfs -chown 400 /user/$USER/sqoop.password
  154. -------------------------------------------------------------------------------
  155.  
  156. --as-avrodatafile
  157. --compress
  158. --compression-codec snappy or lzop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement