Guest User

Untitled

a guest
Apr 2nd, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.58 KB | None | 0 0
  1. mysql -u retail_dba -p
  2.  
  3. sqoop-import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  4. --username retail_dba \
  5. --password cloudera \
  6. --table orders \
  7. --target-dir /user/cloudera/teja_arun/ep01/orders \
  8. --compress \
  9. --compression-codec "org.apache.hadoop.io.compress.SnappyCodec" \
  10. --as-avrodatafile
  11.  
  12.  
  13. sqoop-import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  14. --username retail_dba \
  15. --password cloudera \
  16. --table order_items \
  17. --target-dir /user/cloudera/teja_arun/ep01/order_items \
  18. --compress \
  19. --compression-codec org.apache.hadoop.io.compress.SnappyCodec \
  20. --as-avrodatafile
  21.  
  22.  
  23. from pyspark import Row,HiveContext,SparkContext,SparkConf
  24.  
  25. conf=SparkConf().setAppName("ep01").setMaster("yarn-client")
  26. sc=SparkContext(conf=conf)
  27. sqlContext=HiveContext(sc)
  28.  
  29.  
  30. ordersDF=sqlContext.read.load("/user/cloudera/teja_arun/ep01/orders","com.databricks.spark.avro")
  31. orderItemsDF=sqlContext.read.load("/user/cloudera/teja_arun/ep01/order_items","com.databricks.spark.avro")
  32.  
  33. ordersDF.registerTempTable("orders")
  34. orderItemsDF.registerTempTable("order_items")
  35.  
  36. rder_Date , Order_status, total_orders, total_amount. In plain english, please find total orders and total amount per status per day. The result should be sorted by order date in descending, order status in ascending and total amount in descending and total orders in ascending YYYY-MM-DD
  37.  
  38. sqlResult=sqlContext.sql("select to_date(from_unixtime(cast(order_date/1000 as bigint))) order_formatted_date,order_status,count(order_id) total_count,cast(sum(order_item_subtotal) as decimal(10,2)) order_sum from orders o join order_items oi on o.order_id=oi.order_item_order_id group by to_date(from_unixtime(cast(order_date/1000 as bigint))),o.order_status order by order_formatted_date desc, order_status")
  39.  
  40.  
  41. sqlContext.setConf("spark.sql.parquet.compression.codec","gzip")
  42. sqlResult.write.parquet("/user/cloudera/teja_arun/ep01/solutions/parquet-gzip")
  43.  
  44. sqlContext.setConf("spark.sql.parquet.compression.codec","snappy")
  45. sqlResult.write.parquet("/user/cloudera/teja_arun/solutions/parquet-snappy")
  46.  
  47. sqlResult.map(lambda rec: str(rec[0])+","+str(rec[1])+","+str(rec[2])+","str(rec[3])).saveAsTextFile("/user/cloudera/teja_arun/ep01/solutions/csv")
  48.  
  49. create result_one (order_date date,order_status varchar(255),total_count int,order_sum decimal float) constraint pk_date_status primary key (order_date,order_status)
  50.  
  51.  
  52. sqoop-export \
  53. --table result_one \
  54. --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
  55. --username retail_dba \
  56. --password cloudera \
  57. --export-dir "/user/cloudera/teja_arun/ep01/solutions/csv"
  58. --columns "order_date,order_status,total_count,order_sum"
Add Comment
Please, Sign In to add comment