Advertisement
Guest User

Untitled

a guest
Nov 27th, 2015
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.59 KB | None | 0 0
  1. scala> import org.apache.spark.rdd.RDD
  2. scala> val rdd: RDD[String] = sc.textFile("/home/rafi/MyProjects/spark-learning/dataset/foodmart_final.csv")
  3. scala> val recordsWithHeader: RDD[Array[String]] = rdd.map(line => line.split(",").map(_.trim))
  4. scala> val header: Array[String] = recordsWithHeader.first()
  5. scala> val data: RDD[Array[String]] = recordsWithHeader.filter(_ (0) != header(0))
  6. scala> data.filter(_ (0) == "CDR Apple Preserves").filter(_ (3) == "M").map(_ (4)).reduce((v1, v2) => (v1.toInt + v2.toInt).toString)
  7. scala> data.filter(_(3)=="M").count()
  8. scala> data.filter(_(3)=="F").count()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement