Advertisement
Guest User

Untitled

a guest
Jun 4th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. sqoop import --connect jdbc:mysql://quickstart.cloudera/retail_db --username root --password cloudera \
  2. --table products --target-dir /user/cloudera/products --fields-terminated-by '|'
  3.  
  4. hadoop fs -cp /user/cloudera/products/* /user/cloudera/problem2/products
  5.  
  6. //Read 4 Write 2 Execute 1
  7. //Owner 4+2+1=7(All rights)
  8. //Group 4+2= 6(Read & Write)
  9. //Others 4+1(Read & Execute)
  10.  
  11. hadoop fs -chmod 765 /user/cloudera/problem2/products/
  12.  
  13. Using grouByKey and RDDs:
  14. var Products= sc.textFile("/user/cloudera/problem2/products/")
  15. var ProductsFiltered=Products.filter(rec=>rec.split('|')(4).toDouble>100.0)
  16. var ProductsMap=ProductsFiltered.map(rec=>(rec.split('|')(1).toInt,rec.split('|')(4).toDouble))
  17.  
  18. var HighestPriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.sortBy(r=>r).reverse.head)).sortByKey()
  19. var MinimumPriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.sortBy(r=>r).head)).sortByKey()
  20. var AveragePriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,(rec._2.toList.sum/rec._2.toList.length))).sortByKey()
  21. var NoOfItemsperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.length)).sortByKey()
  22.  
  23. var Result=ProductsMap.groupByKey().map(rec=>(rec._1,(rec._2.toList.sortBy(r=>r).reverse.head,rec._2.toList.sortBy(r=>r).head,(rec._2.toList.sum/rec._2.toList.length),rec._2.toList.length))).sortByKey()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement