Advertisement
Guest User

Untitled

a guest
May 14th, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.24 KB | None | 0 0
  1. //Scala
  2. import org.apache.spark.mllib.regression.LabeledPoint
  3. import org.apache.spark.mllib.feature.HashingTF
  4. import org.apache.spark.mllib.classification.LogisticRegressionWithSGD
  5.  
  6. //Scala
  7. val spam = sc.textFile("/FileStore/tables/spam.txt")
  8. val ham = sc.textFile("/FileStore/tables/ham.txt")
  9.  
  10. //Scala
  11. val tf = new HashingTF(numFeatures = 230000)
  12.  
  13. //Scala
  14. val spamFeatures = spam.map(email => tf.transform(email.split(" ")))
  15. val hamFeatures = ham.map(email => tf.transform(email.split(" ")))
  16.  
  17. //Scala
  18. val positiveExamples = spamFeatures.map(features => LabeledPoint(1, features))
  19. val negativeExamples = hamFeatures.map(features => LabeledPoint(0, features))
  20. val trainingData = positiveExamples ++ negativeExamples
  21.  
  22. //Scala
  23. trainingData.cache()
  24.  
  25. //Scala
  26. val lrLearner = new LogisticRegressionWithSGD()
  27. val model = lrLearner.run(trainingData)
  28.  
  29. //Scala
  30. val posTestExample = tf.transform("Oh my god, you're a winner! To take your price please send 3$ to this card".split(" "))
  31. val negTestExample = tf.transform("Hi Dad, I started studying Spark the other ...".split(" "))
  32. println(s"Prediction for positive test example: ${model.predict(posTestExample)}")
  33. println(s"Prediction for negative test example: ${model.predict(negTestExample)}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement