Advertisement
Guest User

Untitled

a guest
Mar 3rd, 2014
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 2.61 KB | None | 0 0
  1. import java.sql.{DriverManager, ResultSet}
  2. import scala.annotation.tailrec
  3. import scala.collection._;
  4. import scala.collection.mutable.ListBuffer
  5. import util.Random
  6. import org.apache.spark.util.Vector
  7.  
  8. class Cluster(val center: Vector, val points: ListBuffer[Vector] = new ListBuffer[Vector])
  9.  
  10. object SimpleKMeans {
  11.  
  12.   def filledData(): immutable.HashMap[String, Vector] = { ...
  13.     new immutable.HashMap[String, Vector]() ++ result
  14.   }
  15.  
  16.   def randomK(k: Int = 3, values: Iterable[Vector]) = Random.shuffle(values).take(k)
  17.  
  18.   def mean(points: Iterable[Vector], length: Int) = {
  19.     println("In mean func:")
  20.     println("points: " + points)
  21.     println("length: " + length)
  22.     val sum = points.reduce((x: Vector, y: Vector) => x += y)
  23.     println("sum: " + sum)
  24.     println("result: " + sum / length)
  25.     println("*" * 42)
  26.     sum / length
  27.   }
  28.  
  29.   //  @tailrec
  30.   def kMeans(data: immutable.HashMap[String, Vector], oldClusters: Iterable[Cluster]): Void = {
  31.     println(data.values)
  32.     //    val oldPoints = for (c <- oldClusters) yield c.points
  33.     println("Input centers:")
  34.     for (c <- oldClusters) {
  35.       println(c.center)
  36.     }
  37.     println("-" * 42)
  38.     val clusters = for (c <- oldClusters) yield new Cluster(c.center)
  39.     println("Before choosing closest cluster:")
  40.     clusters.map(x => println(x.points))
  41.  
  42.     for (i <- data.values) {
  43. //      println("\n" + i + "\n")
  44.       def closestCluster(prev: Cluster, next: Cluster) = {
  45.         if ((prev.center dist i) > (next.center dist i))
  46.           next
  47.         else
  48.           prev
  49.       }
  50. //      println()
  51. //      println(i)
  52. //      println()
  53.       clusters.reduce(closestCluster).points += i
  54.     }
  55.  
  56.     println("After choosing closest cluster:")
  57.     clusters.map(x => println(x.points))
  58.     val result = for (c <- clusters) yield mean(c.points.toList, c.points.toList.length)
  59.     println("New centers:")
  60.     println(result)
  61.     println("-" * 42)
  62.  
  63.     println("New cluster:")
  64.     kMeans(data, {
  65.       for (r <- result)
  66.       yield {
  67.         println("---- " + r);
  68.         new Cluster(r)
  69.       }
  70.     })
  71.  
  72.     //    if (oldPoints == {
  73.     //      for (c <- clusters) yield c.points
  74.     //    })
  75.     //      result
  76.     //    else
  77.     //      kMeans(data, {
  78.     //        for (r <- result) yield new Cluster(r)
  79.     //      })
  80.   }
  81.  
  82.   def main(args: Array[String]) {
  83.     val data = filledData()
  84.     //data.map(println(_))
  85.  
  86.     val centroids = randomK(values = data.values)
  87.     centroids.map(println(_))
  88.  
  89.     val clusters = for (c <- centroids) yield new Cluster(c)
  90.     println()
  91.  
  92.     println(kMeans(data, clusters))
  93.     println()
  94.  
  95.   }
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement