Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- sqoop-import --connect jdbc:mysql://ms.itversity.com:3306/retail_db \
- --username retail_user \
- --password itversity \
- --table orders \
- --target-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/orders" \
- --compress \
- --compression-codec "org.apache.hadoop.io.compress.SnappyCodec" \
- --as-avrodatafile
- sqoop-import --connect jdbc:mysql://ms.itversity.com:3306/retail_db \
- --username retail_user \
- --password itversity \
- --table order_items \
- --target-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/order_items" \
- --compress \
- --compression-codec "org.apache.hadoop.io.compress.SnappyCodec" \
- --as-avrodatafile
- from pyspark import SparkContext,SparkConf,Row,HiveContext
- import avro.schema
- conf=SparkContext().setAppName("importExportTransformation").setMaster("yarn-client")
- sc=SparkContext(conf=conf)
- sqlContext=HiveContext(sc)
- sqlContext.setConf("spark.sql.shuffle.partitions","8")
- ordersDF=sqlContext.load("/user/krishnatejathatavarthi/teja_aruns/solution01/orders","com.databricks.spark.avro")
- orderItemsDF=sqlContext.load("/user/krishnatejathatavarthi/teja_aruns/solution01/order_items","com.databricks.spark.avro")
- ordersDF.registerTempTable("orders")
- orderItemsDF.registerTempTable("order_items")
- sqlResult=sqlContext.sql("select to_date(from_unixtime(cast(order_date/1000 as bigint))) orderDate,order_status,count(order_id) total_orders,cast(sum(order_item_subtotal) as DECIMAL(10,2)) total_amount from orders o join order_items oi on o.order_id=oi.order_item_order_id group by to_date(from_unixtime(cast(order_date/1000 as bigint))),order_status order by orderDate desc, order_status")
- sqlResult.show()
- sqlContext.setConf("spark.sql.parquet.compression.codec","gzip")
- sqlResult.write.parquet("/user/krishnatejathatavarthi/teja_aruns/solution01/solution/");
- sqlContext.setConf("spark.sql.csv.compression.codec","uncompressed")
- sqlResult.map(lambda rec: str(rec[0])+","+str(rec[1])+","+str(rec[2])+","+str(rec[3])).saveAsTextFile("/user/krishnatejathatavarthi/teja_aruns/solution01/solutions_csv/")
- create table retail_export.krishna_arun_resultOne(
- order_date varchar(255) not null,
- order_status varchar(255) not null,
- total_orders int,
- total_amount numeric,constraint pk_order_result primary key (order_date,order_status));
- sqoop-export --connect jdbc:mysql://ms.itversity.com:3306/retail_export \
- --username retail_user \
- --password itversity \
- --table krishna_arun_resultOne \
- --export-dir "/user/krishnatejathatavarthi/teja_aruns/solution01/solutions_csv/" \
- --columns "order_date,order_status,total_orders,total_amount"
Add Comment
Please, Sign In to add comment