Guest User

Untitled

a guest
Jan 22nd, 2018
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. // Set operations
  2.  
  3. val orders = sc.textFile("/public/retail_db/orders")
  4. val customers_201308 = orders.
  5. filter(order => order.split(",")(1).contains("2013-08")).
  6. map(order => order.split(",")(2).toInt)
  7.  
  8. val customers_201309 = orders.
  9. filter(order => order.split(",")(1).contains("2013-09")).
  10. map(order => order.split(",")(2).toInt)
  11.  
  12. // Get all the customers who placed orders in 2013 August and 2013 September
  13. val customers_201308_and_201309 = customers_201308.intersection(customers_201309)
  14.  
  15. // Get all unique customers who placed orders in 2013 August or 2013 September
  16. val customers_201308_union_201309 = customers_201308.union(customers_201309).distinct
  17.  
  18. // Get all customers who placed orders in 2013 August but not in 2013 September
  19. val customer_201308_minus_201309 = customers_201308.map(c => (c, 1)).
  20. leftOuterJoin(customers_201309.map(c => (c, 1))).
  21. filter(rec => rec._2._2 == None).
  22. map(rec => rec._1).
  23. distinct
Add Comment
Please, Sign In to add comment