Guest User

Untitled

a guest
Dec 17th, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.59 KB | None | 0 0
  1. // val employeesPath = "/Users/itversity/Research/data/hr_db/employees/part-00000"
  2. val employeesPath = "/mnt/c/data/hr_db/employees/part-00000"
  3. /* val employees = spark.
  4. read.
  5. text(employeesPath).
  6. withColumn("employee_id", split($"value", "\t")(0).cast("int")).
  7. withColumn("first_name", split($"value", "\t")(1)).
  8. withColumn("last_name", split($"value", "\t")(2)).
  9. withColumn("email", split($"value", "\t")(3)).
  10. withColumn("phone_number", split($"value", "\t")(4)).
  11. withColumn("hire_date", split($"value", "\t")(5)).
  12. withColumn("job_id", split($"value", "\t")(6)).
  13. withColumn("salary", split($"value", "\t")(7).cast("float")).
  14. withColumn("commission_pct", split($"value", "\t")(8).cast("int")).
  15. withColumn("manager_id", split($"value", "\t")(9).cast("int")).
  16. withColumn("department_id", split($"value", "\t")(10).cast("int")).
  17. drop($"value")
  18. */
  19.  
  20. val employeesRaw = spark.
  21. read.
  22. text(employeesPath).
  23. as[String]
  24.  
  25. val employees = employeesRaw.map(rec => {
  26. val r = rec.split("\t")
  27. (r(0).toInt, r(1), r(2), r(3),
  28. r(4), r(5), r(6), r(7).toFloat,
  29. r(8), r(9), r(10)
  30. )
  31. }).toDF("employee_id", "first_name", "last_name", "email",
  32. "phone_number", "hire_date", "job_id", "salary",
  33. "commission_pct", "manager_id", "department_id")
  34.  
  35. spark.conf.set("spark.sql.shuffle.partitions", "2")
  36.  
  37. import org.apache.spark.sql.expressions.Window
  38.  
  39. val spec = Window.partitionBy("department_id")
  40.  
  41. val employeesSalary = employees.
  42. select("employee_id", "salary", "department_id").
  43. withColumn("department_salary_expense", sum($"salary").over(spec)).
  44. orderBy($"department_id", $"salary".desc)
Add Comment
Please, Sign In to add comment