Advertisement
Guest User

Untitled

a guest
Jun 19th, 2017
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.97 KB | None | 0 0
  1. // Step 1:
  2. sqoop import \
  3. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  4. --username retail_dba \
  5. --password cloudera \
  6. --table products_replica \
  7. --target-dir /user/cloudera/problem5/products-text \
  8. --fields-terminated-by '|' \
  9. --lines-terminated-by '\n' \
  10. --null-non-string -1 \
  11. --null-string "NOT-AVAILABLE" \
  12. -m 3 \
  13. --where "product_id between 1 and 1000" \
  14. --outdir /home/cloudera/sqoop1 \
  15. --boundary-query "select min(product_id), max(product_id) from products_replica where product_id between 1 and 1000";
  16.  
  17.  
  18. // Step 2:
  19. sqoop import \
  20. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  21. --username retail_dba \
  22. --password cloudera \
  23. --table products_replica \
  24. --target-dir /user/cloudera/problem5/products-text-part1 \
  25. --fields-terminated-by '*' \
  26. --lines-terminated-by '\n' \
  27. --null-non-string -1000 \
  28. --null-string "NA" \
  29. -m 2 \
  30. --where "product_id <= 1111 " \
  31. --outdir /home/cloudera/sqoop2 \
  32. --boundary-query "select min(product_id), max(product_id) from products_replica where product_id <= 1111";
  33.  
  34.  
  35.  
  36. // Step 3:
  37. sqoop import \
  38. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  39. --username retail_dba \
  40. --password cloudera \
  41. --table products_replica \
  42. --target-dir /user/cloudera/problem5/products-text-part2 \
  43. --fields-terminated-by '*' \
  44. --lines-terminated-by '\n' \
  45. --null-non-string -1000 \
  46. --null-string "NA" \
  47. -m 5 \
  48. --where "product_id > 1111 " \
  49. --outdir /home/cloudera/sqoop3 \
  50. --boundary-query "select min(product_id), max(product_id) from products_replica where product_id > 1111"
  51.  
  52. // Step 4:
  53. sqoop merge \
  54. --class-name products_replica \
  55. --jar-file mp/sqoop-cloudera/compile/66b4f23796be7625138f2171a7331cd3/products_replica.jar \
  56. --new-data /user/cloudera/problem5/products-text-part2 \
  57. --onto /user/cloudera/problem5/products-text-part1 \
  58. --target-dir /user/cloudera/problem5/products-text-both-parts \
  59. --merge-key product_id;
  60.  
  61.  
  62.  
  63. // Step 5:
  64. On terminal -
  65.  
  66. sqoop job --create first_sqoop_job \
  67. -- import \
  68. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  69. --username "retail_dba" \
  70. --password "cloudera" \
  71. --table products_replica \
  72. --target-dir /user/cloudera/problem5/products-incremental \
  73. --check-column product_id \
  74. --incremental append \
  75. --last-value 0;
  76.  
  77. sqoop job --exec first_sqoop_job
  78.  
  79. // On MySQL command line -
  80.  
  81. insert into products_replica
  82. values (1346,2,'something 1','something 2',300.00,'not avaialble',3,'STRONG');
  83. insert into products_replica
  84. values (1347,5,'something 787','something 2',356.00,'not avaialble',3,'STRONG');
  85.  
  86. // On terminal -
  87.  
  88. sqoop job --exec first_sqoop_job
  89.  
  90. // On MYSQL Command Line
  91.  
  92. insert into products_replica
  93. values (1376,4,'something 1376','something 2',1.00,'not avaialble',3,'WEAK');
  94. insert into products_replica
  95. values (1365,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
  96.  
  97. // On terminal -
  98.  
  99. sqoop job --exec first_sqoop_job
  100.  
  101. // Step 6:
  102. // On Terminal window-
  103.  
  104. sqoop job \
  105. --create hive_sqoop_job \
  106. -- import \
  107. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  108. --username "retail_dba" \
  109. --password "cloudera" \
  110. --table products_replica \
  111. --check-column product_id \
  112. --incremental append \
  113. --last-value 0 \
  114. --hive-import \
  115. --hive-table products_hive \
  116. --hive-database problem5;
  117.  
  118. // On Hive window:
  119.  
  120. create database problem5;
  121.  
  122. use problem5;
  123. create table products_hive (
  124. product_id int,
  125. product_category_id int,
  126. product_name string,
  127. product_description string,
  128. product_price float,
  129. product_imaage string,
  130. product_grade int,
  131. product_sentiment string
  132. );
  133.  
  134. // On Terminal window
  135.  
  136. sqoop job --exec hive_sqoop_job
  137.  
  138. // On MySQL window
  139.  
  140. insert into products_replica
  141. values (1378,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
  142. insert into products_replica
  143. values (1379,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
  144.  
  145. // On Terminal Window
  146.  
  147. sqoop job --exec hive_sqoop_job
  148.  
  149. // On Hive Window
  150.  
  151. select * from products_hive;
  152.  
  153. // Step 7:
  154. // On Hive Window
  155.  
  156. use problem5;
  157.  
  158. insert into table products_hive
  159. values (1380,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
  160. insert into table products_hive
  161. values (1381,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
  162.  
  163. // On MYSQL window
  164. create table products_external (
  165. product_id int(11) primary Key,
  166. product_grade int(11),
  167. product_category_id int(11),
  168. product_name varchar(100),
  169. product_description varchar(100),
  170. product_price float,
  171. product_impage varchar(500),
  172. product_sentiment varchar(100)
  173. );
  174.  
  175. // On Terminal
  176.  
  177. sqoop export \
  178. --username "retail_dba" \
  179. --password "cloudera" \
  180. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  181. --export-dir /user/hive/warehouse/problem5.db/products_hive/ \
  182. --fields-terminated-by '\001' \
  183. --input-null-non-string "null" \
  184. --input-null-string "null" \
  185. --update-mode allowinsert \
  186. --update-key product_id \
  187. --columns "product_id,product_category_id,product_name,product_description,product_price,product_impage,product_grade,product_sentiment" \
  188. --table products_external;
  189.  
  190. // On Hive Window
  191.  
  192. insert into table products_hive
  193. values (1382,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
  194. insert into table products_hive
  195. values (1383,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
  196.  
  197. // On Terminal Window:
  198.  
  199. sqoop export \
  200. --username "retail_dba" \
  201. --password "cloudera" \
  202. --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
  203. --export-dir /user/hive/warehouse/problem5.db/products_hive/ \
  204. --fields-terminated-by '\001' \
  205. --input-null-non-string "null" \
  206. --input-null-string "null" \
  207. --update-mode allowinsert \
  208. --update-key product_id \
  209. --columns "product_id,product_category_id,product_name,product_description,product_price,product_impage,product_grade,product_sentiment" \
  210. --table products_external;
  211.  
  212. // To Validate
  213. // On Hive
  214. select count(*) from problem5.products_hive;
  215. // on MySQL
  216. select count(*) from products_replica;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement