Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.80 KB | None | 0 0
  1. package xxx.DataScience.CompensationStudy
  2.  
  3. import org.apache.spark._
  4. import org.apache.log4j._
  5.  
  6. import org.apache.spark.SparkContext
  7. import org.apache.spark.SparkContext._
  8. import org.apache.spark.SparkConf
  9.  
  10. import org.apache.spark.sql.Row
  11. import org.apache.spark.sql.functions.{col, udf}
  12. import org.apache.spark.sql.types._
  13.  
  14. import org.apache.spark.SparkContext
  15. import org.apache.spark.sql.SQLContext
  16.  
  17.  
  18. object CompensationAnalysis {
  19.  
  20. case class GetDF(profil_date:String, profil_pays:String, param_tarif2:String, param_tarif3:String, dt_titre:String, dt_langues:String,
  21. dt_diplomes:String, dt_experience:String, dt_formation:String, dt_outils:String, comp_applications:String,
  22. comp_interventions:String, comp_competence:String)
  23.  
  24. def main(args: Array[String]) {
  25.  
  26. Logger.getLogger("org").setLevel(Level.ERROR)
  27.  
  28. val conf = new SparkConf().setAppName("CompensationAnalysis ")
  29. val sc = new SparkContext(conf)
  30.  
  31. val sqlContext = new org.apache.spark.sql.SQLContext(sc)
  32. import sqlContext.implicits._
  33.  
  34.  
  35. val lines = sc.textFile("C:/Users/../Downloads/CompensationStudy.csv").flatMap { l =>
  36.  
  37.  
  38. l.split(",") match {
  39.  
  40. case field: Array[String] if field.size > 13 => Some(field(0), field(1), field(2), field(3), field(4), field(5), field(6), field(7), field(8), field(9), field(10), field(11), field(12))
  41.  
  42. case field: Array[String] if field.size == 1 => Some((field(0), "default value"))
  43.  
  44. case _ => None
  45. }
  46.  
  47.  
  48. }
  49.  
  50. val summary = lines.collect().map(x => GetDF(x("profil_date"), x("profil_pays"), x("param_tarif2"), x("param_tarif3"), x("dt_titre"), x("dt_langues"), x("dt_diplomes"), x("dt_experience"), x("dt_formation"), x("dt_outils"), x("comp_applications"), x("comp_interventions"), x("comp_competence")))
  51.  
  52. val sum_df = summary.toDF()
  53.  
  54. df.printSchema
  55.  
  56.  
  57. }
  58.  
  59. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement