Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- sqoop import --connect jdbc:mysql://quickstart.cloudera/retail_db --username root --password cloudera \
- --table products --target-dir /user/cloudera/products --fields-terminated-by '|'
- hadoop fs -cp /user/cloudera/products/* /user/cloudera/problem2/products
- //Read 4 Write 2 Execute 1
- //Owner 4+2+1=7(All rights)
- //Group 4+2= 6(Read & Write)
- //Others 4+1(Read & Execute)
- hadoop fs -chmod 765 /user/cloudera/problem2/products/
- Using grouByKey and RDDs:
- var Products= sc.textFile("/user/cloudera/problem2/products/")
- var ProductsFiltered=Products.filter(rec=>rec.split('|')(4).toDouble>100.0)
- var ProductsMap=ProductsFiltered.map(rec=>(rec.split('|')(1).toInt,rec.split('|')(4).toDouble))
- var HighestPriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.sortBy(r=>r).reverse.head)).sortByKey()
- var MinimumPriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.sortBy(r=>r).head)).sortByKey()
- var AveragePriceperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,(rec._2.toList.sum/rec._2.toList.length))).sortByKey()
- var NoOfItemsperCategory=ProductsMap.groupByKey().map(rec=>(rec._1,rec._2.toList.length)).sortByKey()
- var Result=ProductsMap.groupByKey().map(rec=>(rec._1,(rec._2.toList.sortBy(r=>r).reverse.head,rec._2.toList.sortBy(r=>r).head,(rec._2.toList.sum/rec._2.toList.length),rec._2.toList.length))).sortByKey()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement