Advertisement
JunkPile77

Untitled

Apr 29th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. mport org.apache.spark.mllib.linalg._
  2. import org.apache.spark.mllib.regression._
  3. import org.apache.spark.mllib.evaluation._
  4. import org.apache.spark.mllib.tree._
  5. import org.apache.spark.mllib.tree.model._
  6. import org.apache.spark.mllib.rdd._
  7.  
  8. //download and prepare data
  9. val file =sc.textFile("processed.hungarian.data")
  10.  
  11.  
  12. val fileData = file.map{ x=> x.split(",")}
  13. //1.age 5 chol 14.prediction
  14. val parsedData = fileData.map( x=> (if(x(0)=="?"){-1} else{x(0).toDouble},
  15. if(x(4)=="?"){-1} else{x(4).toDouble},
  16. if(x(13)=="?"){-1} else{x(13).toDouble}))
  17.  
  18. val data = parsedData.map{ x=>
  19. val featurevector = Vectors.dense(x._1, x._2)
  20. val label = x._3
  21. LabeledPoint(label, featurevector)
  22. }
  23.  
  24. val categoricalfeatureinfo = Map[Int, Int] ()
  25. val model = DecisionTree.trainClassifier(data, 2, categoricalfeatureinfo, "gini", 2, 100 );
  26. //data taken in, how many possible outcomes for final result, , algorithm, depth of tree, 100 is number of trees
  27.  
  28. //test
  29. val testData = Vectors.dense(41,336)
  30.  
  31. val prediction = model.predict(testData)
  32. //println("Model Tree : \n " + model.toDebugString)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement