Advertisement
Guest User

Untitled

a guest
Apr 17th, 2014
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 2.12 KB | None | 0 0
  1. package utexas.aorta.dm
  2.  
  3. import scala.collection.mutable
  4.  
  5. // The feature values have been discretized; the possible values are [0, bins)
  6. case class LabeledInstance(label: String, osm_id: String, features: List[Int]) {
  7.   def for_test = UnlabeledInstance(features)
  8. }
  9. case class UnlabeledInstance(features: List[Int])
  10.  
  11. class NaiveBayesClassifier(labels: Set[String], bins: Int) {
  12.   private var priors: Map[String, Double] = Map()
  13.   // the key is (label, feature idx, bin value)
  14.   private val features = new mutable.HashMap[(String, Int, Int), Double]()
  15.  
  16.   def train(training_data: List[LabeledInstance]) {
  17.     // the key is just the label
  18.     val prior_counts = new mutable.HashMap[String, Int]().withDefaultValue(0)
  19.     // the key is (label, feature idx, bin value)
  20.     val feature_counts = new mutable.HashMap[(String, Int, Int), Int]().withDefaultValue(0)
  21.  
  22.     for (instance <- training_data) {
  23.       prior_counts(instance.label) += 1
  24.       for ((bin, feature) <- instance.features.zipWithIndex) {
  25.         feature_counts((instance.label, feature, bin)) += 1
  26.       }
  27.     }
  28.  
  29.     priors = normalize(prior_counts.toMap)
  30.     val num_features = training_data.head.features.size
  31.     for (label <- labels) {
  32.       for (feature <- Range(0, num_features)) {
  33.         // denominator is the same for every value
  34.         val denominator = Range(0, bins).map(value => feature_counts((label, feature, value))).sum
  35.         for (value <- Range(0, bins)) {
  36.           val key = (label, feature, value)
  37.           features(key) = feature_counts(key).toDouble / denominator
  38.         }
  39.       }
  40.     }
  41.   }
  42.  
  43.   def classify(instance: UnlabeledInstance): String = {
  44.     return labels.maxBy(label => posterior(instance, label))
  45.   }
  46.  
  47.   // Returns log(p(instance and class = label))
  48.   private def posterior(instance: UnlabeledInstance, label: String) =
  49.     math.log(priors(label)) + instance.features.zipWithIndex.map({
  50.       case (bin, feature) => math.log(features((label, feature, bin)))
  51.     }).sum
  52.  
  53.   private def normalize[K](m: Map[K, Int]): Map[K, Double] = {
  54.     val sum = m.values.sum
  55.     return m.mapValues(count => count.toDouble / sum)
  56.   }
  57. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement