Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- sqoop-import-all-tables --connect "jdbc:mysql://quickstart.cloudera:3306/retail_db" \
- --username retail_dba \
- --password cloudera \
- --warehouse-dir "/user/hive/warehouse/retail_stage.db" \
- --comrpes \
- --compression-codec snappy \
- --as-avrodatafile \
- -m 1
- hadoop fs -get /user/hive/warehouse/retail_stage.db/orders/part-m-00000.avro
- avro-tools getschema part-m-00000.avro > orders.avsc
- hadoop fs -mkdir /user/hive/schemas
- hadoop fs -mkdir /user/hive/schemas/order
- hadoop fs -copyFromLocal orders.avsc /user/hive/schemas/order
- create table orders_sqoop
- stored as avro
- location "/user/hive/warehouse/retail_stage.db/orders"
- TBLPROPERTIES("avro.schema.url"="/user/hive/schemas/order/orders.avsc")
- select order_id,to_date(from_unixtime(cast(order_date/1000 as int))) order_format_date from orders_sqoop s where s.order_date in (select q.order_date from (select order_date,count(order_id) order_count from orders_sqoop group by order_date order by order_count desc limit 1) q);
- create table orders_avro
- (order_id int,
- order_date date,
- order_customer_id int,
- order_status string)
- partitioned by(order_month string)
- stored as avro;
- insert overwrite table orders_avro partition(order_month)
- select order_id,to_date(from_unixtime(cast(order_date/1000 as int))),order_customer_id,order_status,
- substr(from_unixtime(cast(order_date/1000 as int)),1,7) as order_month from default.orders_sqoop;
- select * from orders_avro where order_date in (select order_date from (select order_id,order_date,count(order_id) order_count from orders_avro group by order_date order by order_count desc limit 1) q)
- hadoop fs -get orsers.avsc
- gedit orders.avsc
- {
- "name":"order_zone",
- "type":["null","int"],
- "default":"null,
- "columnName":"order_zone",
- "sqlType":"4"
- }
Add Comment
Please, Sign In to add comment