Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Step 1:
- sqoop import \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username retail_dba \
- --password cloudera \
- --table products_replica \
- --target-dir /user/cloudera/problem5/products-text \
- --fields-terminated-by '|' \
- --lines-terminated-by '\n' \
- --null-non-string -1 \
- --null-string "NOT-AVAILABLE" \
- -m 3 \
- --where "product_id between 1 and 1000" \
- --outdir /home/cloudera/sqoop1 \
- --boundary-query "select min(product_id), max(product_id) from products_replica where product_id between 1 and 1000";
- // Step 2:
- sqoop import \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username retail_dba \
- --password cloudera \
- --table products_replica \
- --target-dir /user/cloudera/problem5/products-text-part1 \
- --fields-terminated-by '*' \
- --lines-terminated-by '\n' \
- --null-non-string -1000 \
- --null-string "NA" \
- -m 2 \
- --where "product_id <= 1111 " \
- --outdir /home/cloudera/sqoop2 \
- --boundary-query "select min(product_id), max(product_id) from products_replica where product_id <= 1111";
- // Step 3:
- sqoop import \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username retail_dba \
- --password cloudera \
- --table products_replica \
- --target-dir /user/cloudera/problem5/products-text-part2 \
- --fields-terminated-by '*' \
- --lines-terminated-by '\n' \
- --null-non-string -1000 \
- --null-string "NA" \
- -m 5 \
- --where "product_id > 1111 " \
- --outdir /home/cloudera/sqoop3 \
- --boundary-query "select min(product_id), max(product_id) from products_replica where product_id > 1111"
- // Step 4:
- sqoop merge \
- --class-name products_replica \
- --jar-file mp/sqoop-cloudera/compile/66b4f23796be7625138f2171a7331cd3/products_replica.jar \
- --new-data /user/cloudera/problem5/products-text-part2 \
- --onto /user/cloudera/problem5/products-text-part1 \
- --target-dir /user/cloudera/problem5/products-text-both-parts \
- --merge-key product_id;
- // Step 5:
- On terminal -
- sqoop job --create first_sqoop_job \
- -- import \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username "retail_dba" \
- --password "cloudera" \
- --table products_replica \
- --target-dir /user/cloudera/problem5/products-incremental \
- --check-column product_id \
- --incremental append \
- --last-value 0;
- sqoop job --exec first_sqoop_job
- // On MySQL command line -
- insert into products_replica
- values (1346,2,'something 1','something 2',300.00,'not avaialble',3,'STRONG');
- insert into products_replica
- values (1347,5,'something 787','something 2',356.00,'not avaialble',3,'STRONG');
- // On terminal -
- sqoop job --exec first_sqoop_job
- // On MYSQL Command Line
- insert into products_replica
- values (1376,4,'something 1376','something 2',1.00,'not avaialble',3,'WEAK');
- insert into products_replica
- values (1365,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
- // On terminal -
- sqoop job --exec first_sqoop_job
- // Step 6:
- // On Terminal window-
- sqoop job \
- --create hive_sqoop_job \
- -- import \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username "retail_dba" \
- --password "cloudera" \
- --table products_replica \
- --check-column product_id \
- --incremental append \
- --last-value 0 \
- --hive-import \
- --hive-table products_hive \
- --hive-database problem5;
- // On Hive window:
- create database problem5;
- use problem5;
- create table products_hive (
- product_id int,
- product_category_id int,
- product_name string,
- product_description string,
- product_price float,
- product_imaage string,
- product_grade int,
- product_sentiment string
- );
- // On Terminal window
- sqoop job --exec hive_sqoop_job
- // On MySQL window
- insert into products_replica
- values (1378,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
- insert into products_replica
- values (1379,4,'something 1376','something 2',10.00,'not avaialble',null,'NOT APPLICABLE');
- // On Terminal Window
- sqoop job --exec hive_sqoop_job
- // On Hive Window
- select * from products_hive;
- // Step 7:
- // On Hive Window
- use problem5;
- insert into table products_hive
- values (1380,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
- insert into table products_hive
- values (1381,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
- // On MYSQL window
- create table products_external (
- product_id int(11) primary Key,
- product_grade int(11),
- product_category_id int(11),
- product_name varchar(100),
- product_description varchar(100),
- product_price float,
- product_impage varchar(500),
- product_sentiment varchar(100)
- );
- // On Terminal
- sqoop export \
- --username "retail_dba" \
- --password "cloudera" \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --export-dir /user/hive/warehouse/problem5.db/products_hive/ \
- --fields-terminated-by '\001' \
- --input-null-non-string "null" \
- --input-null-string "null" \
- --update-mode allowinsert \
- --update-key product_id \
- --columns "product_id,product_category_id,product_name,product_description,product_price,product_impage,product_grade,product_sentiment" \
- --table products_external;
- // On Hive Window
- insert into table products_hive
- values (1382,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
- insert into table products_hive
- values (1383,4,'something 1380','something 2',8.00,'not avaialble',3,'NOT APPLICABLE');
- // On Terminal Window:
- sqoop export \
- --username "retail_dba" \
- --password "cloudera" \
- --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --export-dir /user/hive/warehouse/problem5.db/products_hive/ \
- --fields-terminated-by '\001' \
- --input-null-non-string "null" \
- --input-null-string "null" \
- --update-mode allowinsert \
- --update-key product_id \
- --columns "product_id,product_category_id,product_name,product_description,product_price,product_impage,product_grade,product_sentiment" \
- --table products_external;
- // To Validate
- // On Hive
- select count(*) from problem5.products_hive;
- // on MySQL
- select count(*) from products_replica;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement