Advertisement
Guest User

failingspark

a guest
Apr 28th, 2015
206
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.39 KB | None | 0 0
  1. import org.apache.spark.storage.StorageLevel
  2.  
  3. val sample = Vector(
  4.   "1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy",
  5.   "2,Jumanji (1995),Adventure|Children|Fantasy",
  6.   "3,Grumpier Old Men (1995),Comedy|Romance",
  7.   "4,Waiting to Exhale (1995),Comedy|Drama|Romance",
  8.   "5,Father of the Bride Part II (1995),Comedy",
  9.   "6,Heat (1995),Action|Crime|Thriller",
  10.   "7,Sabrina (1995),Comedy|Romance",
  11.   "8,Tom and Huck (1995),Adventure|Children",
  12.   "9,Sudden Death (1995),Action",
  13.   "10,GoldenEye (1995),Action|Adventure|Thriller",
  14.   "11,American President(1995),Comedy|Drama|Romance",
  15.   "12,Dracula: Dead and Loving It (1995),Comedy|Horror",
  16.   "13,Balto (1995),Adventure|Animation|Children",
  17.   "14,Nixon (1995),Drama",
  18.   "15,Cutthroat Island (1995),Action|Adventure|Romance",
  19.   "16,Casino (1995),Crime|Drama",
  20.   "17,Sense and Sensibility (1995),Drama|Romance",
  21.   "18,Four Rooms (1995),Comedy",
  22.   "19,Ace Ventura: When Nature Calls (1995),Comedy"
  23. )
  24.  
  25. case class Movie(movieId: Long, title: String, genres: Vector[String]) extends Serializable
  26. val movieData = {
  27.   sparkContext.parallelize(sample)
  28.               .filter(s => !s.contains("movieId,"))
  29.               .map(_.split(",", -1))
  30.               .map(fields => Movie(fields(0).toLong, fields(1), fields(2).split("|").toVector))
  31.               .persist(StorageLevel.MEMORY_ONLY_SER)
  32. }
  33.  
  34. println(s"${movieData.count} movie records")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement