Guest User

Untitled

a guest
Nov 23rd, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.13 KB | None | 0 0
  1. int numAttributes;
  2.  
  3. String []attributeNames;
  4.  
  5. Vector []domains;
  6.  
  7. class DataPoint {
  8.  
  9. public int []attributes;
  10.  
  11. public DataPoint(int numattributes) {
  12.  
  13. attributes = new int[numattributes];
  14.  
  15. }
  16.  
  17. };
  18.  
  19. class TreeNode {
  20.  
  21. public double entropy;
  22.  
  23. public Vector data;
  24.  
  25. public int decompositionAttribute;
  26.  
  27. public int decompositionValue;
  28.  
  29. public TreeNode []children;
  30.  
  31. public TreeNode parent;
  32.  
  33. public TreeNode() {
  34.  
  35. data = new Vector();
  36.  
  37. }
  38.  
  39. };
  40.  
  41. TreeNode root = new TreeNode();
  42.  
  43. public int getSymbolValue(int attribute, String symbol) {
  44.  
  45. int index = domains[attribute].indexOf(symbol);
  46.  
  47. if (index < 0) {
  48.  
  49. domains[attribute].addElement(symbol);
  50.  
  51. return domains[attribute].size() -1;
  52.  
  53. }
  54.  
  55. return index;
  56.  
  57. }
  58.  
  59.  
  60.  
  61. public int []getAllValues(Vector data, int attribute) {
  62.  
  63. Vector values = new Vector();
  64.  
  65. int num = data.size();
  66.  
  67. for (int i=0; i< num; i++) {
  68.  
  69. DataPoint point = (DataPoint)data.elementAt(i);
  70.  
  71. String symbol =
  72.  
  73. (String)domains[attribute].elementAt(point.attributes[attribute] );
  74.  
  75. int index = values.indexOf(symbol);
  76.  
  77. if (index < 0) {
  78.  
  79. values.addElement(symbol);
  80.  
  81. }
  82.  
  83. }
  84.  
  85. int []array = new int[values.size()];
  86.  
  87. for (int i=0; i< array.length; i++) {
  88.  
  89. String symbol = (String)values.elementAt(i);
  90.  
  91. array = domains[attribute].indexOf(symbol);
  92.  
  93. }
  94.  
  95. values = null;
  96.  
  97. return array;
  98.  
  99. }
  100.  
  101.  
  102.  
  103.  
  104.  
  105. public Vector getSubset(Vector data, int attribute, int value) {
  106.  
  107. Vector subset = new Vector();
  108.  
  109. int num = data.size();
  110.  
  111. for (int i=0; i< num; i++) {
  112.  
  113. DataPoint point = (DataPoint)data.elementAt(i);
  114.  
  115. if (point.attributes[attribute] == value) subset.addElement(point);
  116.  
  117. }
  118.  
  119. return subset;
  120.  
  121. }
  122.  
  123.  
  124.  
  125.  
  126.  
  127. public double calculateEntropy(Vector data) {
  128.  
  129. int numdata = data.size();
  130.  
  131. if (numdata == 0) return 0;
  132.  
  133. int attribute = numAttributes-1;
  134.  
  135. int numvalues = domains[attribute].size();
  136.  
  137. double sum = 0;
  138.  
  139. for (int i=0; i< numvalues; i++) {
  140.  
  141. int count=0;
  142.  
  143. for (int j=0; j< numdata; j++) {
  144.  
  145. DataPoint point = (DataPoint)data.elementAt(j);
  146.  
  147. if (point.attributes[attribute] == i) count++;
  148.  
  149. }
  150.  
  151. double probability = 1.*count/numdata;
  152.  
  153. if (count > 0) sum += -probability*Math.log(probability);
  154.  
  155. }
  156.  
  157. return sum;
  158.  
  159. }
  160.  
  161. public boolean alreadyUsedToDecompose(TreeNode node, int attribute) {
  162.  
  163. if (node.children != null) {
  164.  
  165. if (node.decompositionAttribute == attribute )
  166.  
  167. return true;
  168.  
  169. }
  170.  
  171. if (node.parent == null) return false;
  172.  
  173. return alreadyUsedToDecompose(node.parent, attribute);
  174.  
  175. }
  176.  
  177. public void decomposeNode(TreeNode node) {
  178.  
  179. double bestEntropy=0;
  180.  
  181. boolean selected=false;
  182.  
  183. int selectedAttribute=0;
  184.  
  185. int numdata = node.data.size();
  186.  
  187. int numinputattributes = numAttributes-1;
  188.  
  189. node.entropy = calculateEntropy(node.data);
  190.  
  191. if (node.entropy == 0) return;
  192.  
  193.  
  194.  
  195. for (int i=0; i< numinputattributes; i++) {
  196.  
  197. int numvalues = domains.size();
  198.  
  199. if ( alreadyUsedToDecompose(node, i) ) continue;
  200.  
  201. double averageentropy = 0;
  202.  
  203. for (int j=0; j< numvalues; j++) {
  204.  
  205. Vector subset = getSubset(node.data, i, j);
  206.  
  207. if (subset.size() == 0) continue;
  208.  
  209. double subentropy = calculateEntropy(subset);
  210.  
  211. averageentropy += subentropy *
  212.  
  213. subset.size();
  214.  
  215. }
  216.  
  217. averageentropy = averageentropy / numdata; //Taking the weighted average
  218.  
  219. if (selected == false) {
  220.  
  221. selected = true;
  222.  
  223. bestEntropy = averageentropy;
  224.  
  225. selectedAttribute = i;
  226.  
  227. } else {
  228.  
  229. if (averageentropy < bestEntropy) {
  230.  
  231. selected = true;
  232.  
  233. bestEntropy = averageentropy;
  234.  
  235. selectedAttribute = i;
  236.  
  237. }
  238.  
  239. }
  240.  
  241. }
  242.  
  243. if (selected == false) return;
  244.  
  245. int numvalues = domains[selectedAttribute].size();
  246.  
  247. node.decompositionAttribute = selectedAttribute;
  248.  
  249. node.children = new TreeNode [numvalues];
  250.  
  251. for (int j=0; j< numvalues; j++) {
  252.  
  253. node.children[j] = new TreeNode();
  254.  
  255. node.children[j].parent = node;
  256.  
  257. node.children[j].data = getSubset(node.data,
  258.  
  259. selectedAttribute, j);
  260.  
  261. node.children[j].decompositionValue = j;
  262.  
  263. }
  264.  
  265.  
  266.  
  267. for (int j=0; j< numvalues; j++) {
  268.  
  269. decomposeNode(node.children[j]);
  270.  
  271. }
  272.  
  273.  
  274.  
  275. node.data = null;
  276.  
  277. }
  278.  
  279.  
  280.  
  281.  
  282.  
  283. public int readData(String filename) throws Exception {
  284.  
  285. FileInputStream in = null;
  286.  
  287. try {
  288.  
  289. File inputFile = new File(filename);
  290.  
  291. in = new FileInputStream(inputFile);
  292.  
  293. } catch ( Exception e) {
  294.  
  295. System.err.println( "Unable to open data file: " + filename + "n" + e);
  296.  
  297. return 0;
  298.  
  299. }
  300.  
  301. BufferedReader bin = new BufferedReader(new InputStreamReader(in) );
  302.  
  303. String input;
  304.  
  305. while(true) {
  306.  
  307. input = bin.readLine();
  308.  
  309. if (input == null) {
  310.  
  311. System.err.println( "No data found in the data file: " + filename +
  312.  
  313. "n");
  314.  
  315. return 0;
  316.  
  317. }
  318.  
  319. if (input.startsWith("//")) continue;
  320.  
  321. if (input.equals("")) continue;
  322.  
  323. break;
  324.  
  325. }
  326.  
  327.  
  328.  
  329. StringTokenizer tokenizer = new StringTokenizer(input);
  330.  
  331. numAttributes = tokenizer.countTokens();
  332.  
  333. if (numAttributes <= 1) {
  334.  
  335. System.err.println( "Read line: " + input);
  336.  
  337. System.err.println( "Could not obtain the names of attributes in the line");
  338.  
  339. System.err.println( "Expecting at least one input attribute and one output attribute");
  340.  
  341. return 0;
  342.  
  343. }
  344.  
  345. domains = new Vector[numAttributes];
  346.  
  347. for (int i=0; i < numAttributes; i++) domains = new Vector();
  348.  
  349. attributeNames = new String[numAttributes];
  350.  
  351. for (int i=0; i < numAttributes; i++) {
  352.  
  353. attributeNames = tokenizer.nextToken();
  354.  
  355. }
  356.  
  357.  
  358.  
  359. while(true) {
  360.  
  361. input = bin.readLine();
  362.  
  363. if (input == null) break;
  364.  
  365. if (input.startsWith("//")) continue;
  366.  
  367. if (input.equals("")) continue;
  368.  
  369. tokenizer = new StringTokenizer(input);
  370.  
  371. int numtokens = tokenizer.countTokens();
  372.  
  373. if (numtokens != numAttributes) {
  374.  
  375. System.err.println( "Read " + root.data.size() + " data");
  376.  
  377. System.err.println( "Last line read: " + input);
  378.  
  379. System.err.println( "Expecting " + numAttributes + " attributes");
  380.  
  381. return 0;
  382.  
  383. }
  384.  
  385. DataPoint point = new DataPoint(numAttributes);
  386.  
  387. for (int i=0; i < numAttributes; i++) {
  388.  
  389. point.attributes = getSymbolValue(i, tokenizer.nextToken()
  390. );
  391.  
  392. }
  393.  
  394. root.data.addElement(point);
  395.  
  396. }
  397.  
  398. bin.close();
  399.  
  400. return 1;
  401.  
  402. }
  403.  
  404.  
  405.  
  406. public void printTree(TreeNode node, String tab) {
  407.  
  408. int outputattr = numAttributes-1;
  409.  
  410. if (node.children == null) {
  411.  
  412. int []values = getAllValues(node.data, outputattr );
  413.  
  414. if (values.length == 1) {
  415.  
  416. System.out.println(tab + "t" + attributeNames[outputattr] + " = "" +
  417.  
  418. domains[outputattr].elementAt(values[0]) + "";");
  419.  
  420. return;
  421.  
  422. }
  423.  
  424. System.out.print(tab + "t" + attributeNames[outputattr] + " = {");
  425.  
  426. for (int i=0; i < values.length; i++) {
  427.  
  428. System.out.print(""" + domains[outputattr].elementAt(values) + "" ");
  429.  
  430. if ( i != values.length-1 ) System.out.print( " , " );
  431.  
  432. }
  433.  
  434. System.out.println( " };");
  435.  
  436. return;
  437.  
  438. }
  439.  
  440. int numvalues = node.children.length;
  441.  
  442. for (int i=0; i < numvalues; i++) {
  443.  
  444. System.out.println(tab + "if( " +
  445.  
  446. attributeNames[node.decompositionAttribute] + " == "" +
  447.  
  448. domains[node.decompositionAttribute].elementAt(i)
  449.  
  450. + "") {" );
  451.  
  452. printTree(node.children, tab + "t");
  453.  
  454. if (i != numvalues-1) System.out.print(tab + "} else ");
  455.  
  456. else System.out.println(tab + "}");
  457.  
  458. }
  459.  
  460.  
  461.  
  462. }
  463.  
  464.  
  465.  
  466. public void createDecisionTree() {
  467.  
  468. decomposeNode(root);
  469.  
  470. printTree(root, "");
  471.  
  472. }
  473.  
  474. public static void main(String[] args) throws Exception {
  475.  
  476.  
  477.  
  478.  
  479.  
  480.  
  481.  
  482. ID3 me = new ID3();
  483.  
  484. int status = me.readData("c:\in.txt");
  485.  
  486. if (status <= 0) return;
  487.  
  488. me.createDecisionTree();
  489.  
  490. }
Add Comment
Please, Sign In to add comment