Advertisement
Guest User

Untitled

a guest
Dec 10th, 2016
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.24 KB | None | 0 0
  1. import org.apache.hadoop.io.Text
  2. import org.apache.hadoop.io.LongWritable
  3. import org.apache.hadoop.conf.Configuration
  4. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
  5. import org.apache.spark.mllib.recommendation.ALS
  6. import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
  7. import org.apache.spark.mllib.recommendation.Rating
  8.  
  9. val conf = new Configuration
  10. conf.set("textinputformat.record.delimiter", "\n\n")
  11.  
  12. val rawData = sc.newAPIHadoopFile("/shared3/data-small.txt", classOf[TextInputFormat], classOf[LongWritable], classOf[Text], conf).map(_._2.toString)
  13. val splitReviews = rawData.map( review => review.split("\n") )
  14. val reviewMap = splitReviews.map( arr => {
  15.     val productId = arr( 0 ).split("/productId: ")(1)
  16.     val userId = arr( 3 ).split("/userId: ")(1)
  17.     val score = arr( 6 ).split("/score: ")(1)
  18.     (userId, productId, score)
  19. } )
  20.  
  21. val names = reviewMap.map(_._1).distinct.sortBy(x => x).zipWithIndex.collectAsMap
  22. val products = reviewMap.map(_._2).distinct.sortBy(x => x).zipWithIndex.collectAsMap
  23. val ratings = reviewMap.collect.map( r => new Rating(names(r._1).toInt, products(r._2)toInt, r._3.toDouble))
  24. val rank = 10
  25. val numIterations = 10
  26. val model = ALS.train(ratings, rank, numIterations, 0.01)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement