Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Load products from local file system and convert into RDD /data/retail_db/products/part-00000
- import scala.io.Source
- val productsRaw = Source.
- fromFile("/data/retail_db/products/part-00000").
- getLines.
- toList
- val products = sc.parallelize(productsRaw)
- products.take(10).foreach(println)
- products.count
- // Join daily revenue per product id with products to get daily revenue per product (by name)
- val productsMap = products.
- map(product => (product.split(",")(0).toInt, product.split(",")(2)))
- productsMap.take(10).foreach(println)
- productsMap.count
- //((order_date, order_product_id), daily_revenue_per_product_id)
- val dailyRevenuePerProductIdMap = dailyRevenuePerProductId.
- map(rec => (rec._1._2, (rec._1._1, rec._2)))
- dailyRevenuePerProductIdMap.take(10).foreach(println)
- //(order_product_id, (order_date, daily_revenue_per_product_id))
- val dailyRevenuePerProductJoin = dailyRevenuePerProductIdMap.join(productsMap)
- //(order_product_id, ((order_date, daily_revenue_per_product_id), product_name))
Add Comment
Please, Sign In to add comment