Guest User

Untitled

a guest
Jan 23rd, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.99 KB | None | 0 0
  1. // Load products from local file system and convert into RDD /data/retail_db/products/part-00000
  2.  
  3. import scala.io.Source
  4. val productsRaw = Source.
  5. fromFile("/data/retail_db/products/part-00000").
  6. getLines.
  7. toList
  8. val products = sc.parallelize(productsRaw)
  9. products.take(10).foreach(println)
  10. products.count
  11.  
  12. // Join daily revenue per product id with products to get daily revenue per product (by name)
  13. val productsMap = products.
  14. map(product => (product.split(",")(0).toInt, product.split(",")(2)))
  15. productsMap.take(10).foreach(println)
  16. productsMap.count
  17.  
  18. //((order_date, order_product_id), daily_revenue_per_product_id)
  19. val dailyRevenuePerProductIdMap = dailyRevenuePerProductId.
  20. map(rec => (rec._1._2, (rec._1._1, rec._2)))
  21. dailyRevenuePerProductIdMap.take(10).foreach(println)
  22. //(order_product_id, (order_date, daily_revenue_per_product_id))
  23.  
  24. val dailyRevenuePerProductJoin = dailyRevenuePerProductIdMap.join(productsMap)
  25. //(order_product_id, ((order_date, daily_revenue_per_product_id), product_name))
Add Comment
Please, Sign In to add comment