Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.spark.storage.StorageLevel
- val sample = Vector(
- "1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy",
- "2,Jumanji (1995),Adventure|Children|Fantasy",
- "3,Grumpier Old Men (1995),Comedy|Romance",
- "4,Waiting to Exhale (1995),Comedy|Drama|Romance",
- "5,Father of the Bride Part II (1995),Comedy",
- "6,Heat (1995),Action|Crime|Thriller",
- "7,Sabrina (1995),Comedy|Romance",
- "8,Tom and Huck (1995),Adventure|Children",
- "9,Sudden Death (1995),Action",
- "10,GoldenEye (1995),Action|Adventure|Thriller",
- "11,American President(1995),Comedy|Drama|Romance",
- "12,Dracula: Dead and Loving It (1995),Comedy|Horror",
- "13,Balto (1995),Adventure|Animation|Children",
- "14,Nixon (1995),Drama",
- "15,Cutthroat Island (1995),Action|Adventure|Romance",
- "16,Casino (1995),Crime|Drama",
- "17,Sense and Sensibility (1995),Drama|Romance",
- "18,Four Rooms (1995),Comedy",
- "19,Ace Ventura: When Nature Calls (1995),Comedy"
- )
- case class Movie(movieId: Long, title: String, genres: Vector[String]) extends Serializable
- val movieData = {
- sparkContext.parallelize(sample)
- .filter(s => !s.contains("movieId,"))
- .map(_.split(",", -1))
- .map(fields => Movie(fields(0).toLong, fields(1), fields(2).split("|").toVector))
- .persist(StorageLevel.MEMORY_ONLY_SER)
- }
- println(s"${movieData.count} movie records")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement