Advertisement
Guest User

Untitled

a guest
May 6th, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.11 KB | None | 0 0
  1. import org.apache.hadoop.conf.Configuration
  2. import org.apache.mahout.math.cf.SimilarityAnalysis
  3. import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
  4. import org.apache.spark.rdd.{NewHadoopRDD, RDD}
  5. import org.apache.spark.{SparkConf, SparkContext}
  6. import org.bson.BSONObject
  7. import com.mongodb.hadoop.MongoInputFormat
  8.  
  9. object SparkExample extends App {
  10. val mongoConfig = new Configuration()
  11. mongoConfig.set("mongo.input.uri", "mongodb://my_mongo_ip:27017/db.collection")
  12.  
  13. val sparkConf = new SparkConf()
  14. val sc = new SparkContext("local", "SparkExample", sparkConf)
  15.  
  16. val documents = sc.newAPIHadoopRDD(
  17. mongoConfig,
  18. classOf[MongoInputFormat],
  19. classOf[Object],
  20. classOf[BSONObject]
  21. )
  22. val new_doc: RDD[(String, String)] = documents.map(
  23. doc1 => (
  24. doc1._2.get("product_id").toString(),
  25. doc1._2.get("product_attribute_value").toString().replace("[ "", "").replace(""]", "").split("" , "").map(value => value.toLowerCase.replace(" ", "-")).mkString(" ")
  26. )
  27. )
  28. var myIDs = IndexedDatasetSpark(new_doc) // This line gives error
  29.  
  30. SimilarityAnalysis.rowSimilarityIDS(myIDs)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement