Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Read orders and order_items
- val orders = sc.textFile("/public/retail_db/orders")
- val orderItems = sc.textFile("/public/retail_db/order_items")
- orders.first
- orderItems.first
- orders.take(10).foreach(println)
- orderItems.take(10).foreach(println)
- // Filter for completed or closed orders
- orders.
- map(order => order.split(",")(3)).
- distinct.
- collect.
- foreach(println)
- val ordersFiltered = orders.
- filter(order => order.split(",")(3) == "COMPLETE" || order.split(",")(3) == "CLOSED")
- ordersFiltered.take(100).foreach(println)
- // Convert both filtered orders and order_items to key value pairs
- val ordersMap = ordersFiltered.
- map(order => (order.split(",")(0).toInt, order.split(",")(1)))
- val orderItemsMap = orderItems.
- map(oi => (oi.split(",")(1).toInt,(oi.split(",")(2).toInt, oi.split(",")(4).toFloat)))
- ordersMap.take(10).foreach(println)
- orderItemsMap.take(10).foreach(println)
- // Join the two data sets
- val ordersJoin = ordersMap.join(orderItemsMap)
- ordersJoin.take(10).foreach(println)
- ordersJoin.count
Add Comment
Please, Sign In to add comment