Advertisement
Guest User

Untitled

a guest
Mar 5th, 2015
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.54 KB | None | 0 0
  1.     // // The sum of the specific conditional entropies * the probability of
  2.     // each
  3.     // // value for each value in ix_attr.
  4.     public static double conditionalEntropy(List<Instance> instances,
  5.             Integer ix_attr) {
  6.         double sum = 0.0;
  7.         Map<Integer, List<Instance>> instancesByAttrVal = binInstancesByVal(
  8.                 instances, ix_attr);
  9.         for (List<Instance> filteredInstances : instancesByAttrVal.values()) {
  10.             // probability of being in this set is equal to the number of items
  11.             // in the set divided by the total number of items
  12.             double probability = filteredInstances.size()
  13.                     / (double) instances.size();
  14.             // specific conditional entropy
  15.             sum += entropy(filteredInstances) * probability;
  16.         }
  17.  
  18.         return sum;
  19.     }
  20.  
  21.     public static Map<Integer, List<Instance>> binInstancesByVal(
  22.             List<Instance> instances, Integer ix_attr) {
  23.         if (instances == null || ix_attr < 0)
  24.             throw new IllegalArgumentException();
  25.  
  26.         Map<Integer, List<Instance>> instancesByVal = new HashMap<Integer, List<Instance>>();
  27.  
  28.         for (Instance i : instances) {
  29.             int val = i.attributes.get(ix_attr);
  30.             instancesByVal.putIfAbsent(val, new ArrayList<Instance>());
  31.             List<Instance> l = instancesByVal.get(val);
  32.             l.add(i);
  33.         }
  34.  
  35.         return instancesByVal;
  36.     }
  37.  
  38.     // Based on diversity of label
  39.     public static double entropy(List<Instance> instances) {
  40.         if (instances == null)
  41.             throw new IllegalArgumentException();
  42.  
  43.         double sum = 0.0;
  44.         Map<Integer, Integer> numInstancesByLabel = countInstancesByLabel(instances);
  45.         for (int num : numInstancesByLabel.values()) {
  46.             double probability = (double) num / instances.size();
  47.             // add Pr(i) * lg_2(Pr(i))
  48.             sum -= probability * Math.log(probability) / Math.log(2);
  49.         }
  50.  
  51.         return sum;
  52.     }
  53.  
  54.     public static Map<Integer, Integer> countInstancesByLabel(
  55.             List<Instance> instances) {
  56.         if (instances == null)
  57.             throw new IllegalArgumentException();
  58.  
  59.         Map<Integer, Integer> numInstancesByLabel = new HashMap<Integer, Integer>();
  60.         for (Instance i : instances) {
  61.             int num = 0;
  62.             if (numInstancesByLabel.containsKey(i.label))
  63.                 num = numInstancesByLabel.get(i.label);
  64.             numInstancesByLabel.put(i.label, num + 1);
  65.         }
  66.         return numInstancesByLabel;
  67.     }
  68.     public void rootInfoGain(DataSet train) {
  69.         this.labels = train.labels;
  70.         this.attributes = train.attributes;
  71.         this.attributeValues = train.attributeValues;
  72.  
  73.         for (int i = 0; i < attributes.size(); i++) {
  74.             System.out
  75.                     .printf("%s %.5f\n", attributes.get(i),
  76.                             1.0 - DecisionTreeImpl.conditionalEntropy(
  77.                                     train.instances, i));
  78.         }
  79.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement