Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
- package id3;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileReader;
- import java.util.LinkedList;
- import java.util.List;
- /**
- *
- * @author Student
- */
- public class Main {
- /**
- * @param args the command line arguments
- */
- public static List<int[]> dataset;
- private static double log2(double x) {
- if (x == 0) return 0;
- return Math.log(x) / Math.log(2);
- }
- public static double frequency(int attr, int value, List<int[]> dataset) {
- int total = dataset.size();
- int occurences = 0;
- for (int i=0; i<total; i++) {
- int[] line = dataset.get(i);
- if (line[attr] == value) occurences++;
- }
- return (double)occurences / (double)total;
- }
- public static List<int[]> subtable(List<int[]> dataset, int attr, int value) {
- List<int[]> result = new LinkedList<int[]>();
- for (int i=0; i<dataset.size(); i++) {
- int[] line = dataset.get(i);
- if (line[attr] == value) {
- line[attr] = -1;
- result.add(line);
- }
- }
- return result;
- }
- public static double avg_entropy(int attr1, int attr2, List<int[]> dataset) {
- List values = new LinkedList();
- double result = 0;
- for (int i=0; i<dataset.size(); i++) {
- int[] line = dataset.get(i);
- if (!values.contains(line[attr1])) {
- values.add(line[attr1]);
- result = result + frequency(attr1, line[attr1], dataset) * entropy(attr2, subtable(dataset, attr1, line[attr1]));
- }
- }
- return result;
- }
- public static double entropy(int attr, List<int[]> dataset) {
- double e = 0;
- List values = new LinkedList();
- for (int i=0; i<dataset.size(); i++) {
- int[] line = dataset.get(i);
- if (!values.contains(line[attr])) {
- values.add(line[attr]);
- double f = frequency(attr, line[attr], dataset);
- //System.out.println("F("+line[attr]+") = " + f + ", log2 = " + log2(f));
- e = e + f * log2(f);
- }
- }
- return -e;
- }
- private static class Node {
- String label;
- List<Node> nodes;
- }
- private static int most_common_value(int attr, List<int[]> dataset) {
- double max_f = 0;
- int most_common = 0;
- for (int i=0; i<dataset.size(); i++) {
- int[] line = dataset.get(i);
- if ((frequency(attr, line[attr], dataset)) > max_f) {
- max_f = frequency(attr, line[attr], dataset);
- most_common = line[attr];
- }
- }
- return most_common;
- }
- public static Node id3(List<int[]> dataset, int attr) {
- if (dataset.isEmpty()) return null;
- Node n = new Node();
- boolean empty = true;
- int[] sample = dataset.get(0);
- for (int i=0; i<4; i++) if (i!=attr) {
- if (sample[i] != -1) {
- empty = false;
- break;
- }
- }
- if (empty) {
- int most_common = most_common_value(attr, dataset);
- n.label = "ATTR[" + attr + "] = " + most_common;
- } else {
- boolean same = true;
- int[] first = dataset.get(0);
- for (int i=1; i<dataset.size(); i++) {
- int[] line = dataset.get(i);
- if (line[attr] != first[attr]) {
- same = false;
- break;
- }
- }
- if (same) {
- n.label = "ATTR[" + attr + "] = " + first[attr] + " with probability 1";
- } else {
- double min_avg_e = 1000000;
- int min_attr = 0;
- for (int a=0; a<4; a++) {
- double avg_e = avg_entropy(a, attr, dataset);
- if (avg_e < min_avg_e) {
- min_attr = a;
- min_avg_e = avg_e;
- }
- }
- if (min_avg_e > entropy(attr, dataset) / 2) {
- } else {
- }
- }
- }
- }
- public static List<int[]> readFile(String filename) {
- List<int[]> result = new LinkedList<int[]>();
- try {
- File myFile = new File (filename);
- BufferedReader newFile = new BufferedReader(new FileReader(myFile));
- while (true) {
- if (!newFile.ready()) break;
- String next_line = newFile.readLine();
- System.out.println(next_line);
- int[] line = new int[4];
- line[0] = (next_line.charAt(0) == 'Y') ? 1 : 0;
- line[1] = (next_line.charAt(1) == 'Y') ? 1 : 0;
- line[2] = (next_line.charAt(2) == 'Y') ? 1 : 0;
- line[3] = Integer.parseInt(""+next_line.charAt(3));
- result.add(line);
- }
- } catch (Exception e) {
- System.out.println("Exception: " + e.toString());
- }
- return result;
- }
- public static void main(String[] args) {
- dataset = readFile("input.txt");
- System.out.println(entropy(3, dataset));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement