Guest User

Untitled

a guest
Feb 24th, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.50 KB | None | 0 0
  1. sqoop-import --connect jdbc:mysql://ms.itversity.com:3306/retail_db \
  2. --username retail_user \
  3. --password itversity \
  4. --table orders \
  5. --target-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/orders" \
  6. --compress \
  7. --compression-codec "org.apache.hadoop.io.compress.SnappyCodec" \
  8. --as-avrodatafile
  9.  
  10. sqoop-import --connect jdbc:mysql://ms.itversity.com:3306/retail_db \
  11. --username retail_user \
  12. --password itversity \
  13. --table order_items \
  14. --target-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/order_items" \
  15. --compress \
  16. --compression-codec "org.apache.hadoop.io.compress.SnappyCodec" \
  17. --as-avrodatafile
  18.  
  19. from pyspark import SparkContext,SparkConf,Row,HiveContext
  20. import avro.schema
  21.  
  22. conf=SparkContext().setAppName("importExportTransformation").setMaster("yarn-client")
  23. sc=SparkContext(conf=conf)
  24. sqlContext=HiveContext(sc)
  25.  
  26. sqlContext.setConf("spark.sql.shuffle.partitions","8")
  27. ordersDF=sqlContext.load("/user/krishnatejathatavarthi/teja_aruns/solution01/orders","com.databricks.spark.avro")
  28. orderItemsDF=sqlContext.load("/user/krishnatejathatavarthi/teja_aruns/solution01/order_items","com.databricks.spark.avro")
  29.  
  30. ordersDF.registerTempTable("orders")
  31. orderItemsDF.registerTempTable("order_items")
  32.  
  33. sqlResult=sqlContext.sql("select to_date(from_unixtime(cast(order_date/1000 as bigint))) orderDate,order_status,count(order_id) total_orders,cast(sum(order_item_subtotal) as DECIMAL(10,2)) total_amount from orders o join order_items oi on o.order_id=oi.order_item_order_id group by to_date(from_unixtime(cast(order_date/1000 as bigint))),order_status order by orderDate desc, order_status")
  34.  
  35. sqlResult.show()
  36.  
  37. sqlContext.setConf("spark.sql.parquet.compression.codec","gzip")
  38. sqlResult.write.parquet("/user/krishnatejathatavarthi/teja_aruns/solution01/solution/");
  39.  
  40. sqlContext.setConf("spark.sql.csv.compression.codec","uncompressed")
  41. sqlResult.map(lambda rec: str(rec[0])+","+str(rec[1])+","+str(rec[2])+","+str(rec[3])).saveAsTextFile("/user/krishnatejathatavarthi/teja_aruns/solution01/solutions_csv/")
  42.  
  43.  
  44. create table retail_export.krishna_arun_resultOne(
  45. order_date varchar(255) not null,
  46. order_status varchar(255) not null,
  47. total_orders int,
  48. total_amount numeric,constraint pk_order_result primary key (order_date,order_status));
  49.  
  50. sqoop-export --connect jdbc:mysql://ms.itversity.com:3306/retail_export \
  51. --username retail_user \
  52. --password itversity \
  53. --table krishna_arun_resultOne \
  54. --export-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/solutions_csv/" \
  55. --columns "order_date,order_status,total_orders,total_amount"
Add Comment
Please, Sign In to add comment