Guest User

Untitled

a guest
Jan 23rd, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.00 KB | None | 0 0
  1. // Read orders and order_items
  2. val orders = sc.textFile("/public/retail_db/orders")
  3. val orderItems = sc.textFile("/public/retail_db/order_items")
  4.  
  5. orders.first
  6. orderItems.first
  7.  
  8. orders.take(10).foreach(println)
  9. orderItems.take(10).foreach(println)
  10.  
  11. // Filter for completed or closed orders
  12. orders.
  13. map(order => order.split(",")(3)).
  14. distinct.
  15. collect.
  16. foreach(println)
  17. val ordersFiltered = orders.
  18. filter(order => order.split(",")(3) == "COMPLETE" || order.split(",")(3) == "CLOSED")
  19. ordersFiltered.take(100).foreach(println)
  20.  
  21. // Convert both filtered orders and order_items to key value pairs
  22. val ordersMap = ordersFiltered.
  23. map(order => (order.split(",")(0).toInt, order.split(",")(1)))
  24. val orderItemsMap = orderItems.
  25. map(oi => (oi.split(",")(1).toInt,(oi.split(",")(2).toInt, oi.split(",")(4).toFloat)))
  26.  
  27. ordersMap.take(10).foreach(println)
  28. orderItemsMap.take(10).foreach(println)
  29.  
  30. // Join the two data sets
  31. val ordersJoin = ordersMap.join(orderItemsMap)
  32. ordersJoin.take(10).foreach(println)
  33. ordersJoin.count
Add Comment
Please, Sign In to add comment