Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- scala> import org.apache.spark.rdd.RDD
- scala> val rdd: RDD[String] = sc.textFile("/home/rafi/MyProjects/spark-learning/dataset/foodmart_final.csv")
- scala> val recordsWithHeader: RDD[Array[String]] = rdd.map(line => line.split(",").map(_.trim))
- scala> val header: Array[String] = recordsWithHeader.first()
- scala> val data: RDD[Array[String]] = recordsWithHeader.filter(_ (0) != header(0))
- scala> data.filter(_ (0) == "CDR Apple Preserves").filter(_ (3) == "M").map(_ (4)).reduce((v1, v2) => (v1.toInt + v2.toInt).toString)
- scala> data.filter(_(3)=="M").count()
- scala> data.filter(_(3)=="F").count()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement