Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Sep 12 15:39:43 2019
- @author: kylepaulus
- """
- import monkdata as m
- import drawtree_qt5 as u
- import dtree as d
- from operator import itemgetter
- """
- Compute the average gain across all 6 attributes and print results
- """
- for i in range(6):
- print(' ' + 'A' + str(i+1)+' =' + ' ' + str(d.averageGain(m.monk1, m.attributes[i])))
- #print(d.averageGain(m.monk1, m.attributes[0]))
- #print(d.averageGain(m.monk1, m.attributes[1]))
- #print(d.averageGain(m.monk1, m.attributes[2]))
- #print(d.averageGain(m.monk1, m.attributes[3]))
- #print(d.averageGain(m.monk1, m.attributes[4]))
- #print(d.averageGain(m.monk1, m.attributes[5]))
- """
- Create a for loop for all 4 subsets of
- """
- #monk1_A51 = d.select(m.monk1, m.attributes[4],1)
- #monk1_A52 = d.select(m.monk1, m.attributes[4],2)
- #monk1_A53 = d.select(m.monk1, m.attributes[4],3)
- #monk1_A54 = d.select(m.monk1, m.attributes[4],4)
- #import pandas as pd
- #test = pd.DataFrame()
- #for i in range(5):
- # for x in d.select(m.monk1, m.attributes[4],i):
- # test = test.append(pd.DataFrame(x.attribute, index=[0]))
- #test = test.reset_index()
- #
- #
- #
- #a = d.select(m.monk1, m.attributes[4],i)
- for i in range(5):
- for x in d.select(m.monk1, m.attributes[4],i):
- print('Monk1'+ ' '+ 'A5' + '=' + str(i) + str(x.attribute))
- """
- Create a 4 loop that tests 6 attributes of A5 and computes average gain for
- each attribute, then prints a label = to the value
- Nested for loop 1-4, and a for loop 1-6
- """
- #print(d.averageGain(monk1_A51, m.attributes[0]))
- #print(d.averageGain(monk1_A51, m.attributes[1]))
- #print(d.averageGain(monk1_A51, m.attributes[2]))
- #print(d.averageGain(monk1_A51, m.attributes[3]))
- #print(d.averageGain(monk1_A51, m.attributes[4]))
- #print(d.averageGain(monk1_A51, m.attributes[5]))
- #
- #
- #print(d.averageGain(monk1_A52, m.attributes[0]))
- #print(d.averageGain(monk1_A52, m.attributes[1]))
- #print(d.averageGain(monk1_A52, m.attributes[2]))
- #print(d.averageGain(monk1_A52, m.attributes[3]))
- #print(d.averageGain(monk1_A52, m.attributes[4]))
- #print(d.averageGain(monk1_A52, m.attributes[5]))
- #
- #print(d.averageGain(monk1_A53, m.attributes[0]))
- #print(d.averageGain(monk1_A53, m.attributes[1]))
- #print(d.averageGain(monk1_A53, m.attributes[2]))
- #print(d.averageGain(monk1_A53, m.attributes[3]))
- #print(d.averageGain(monk1_A53, m.attributes[4]))
- #print(d.averageGain(monk1_A53, m.attributes[5]))
- #
- #print(d.averageGain(monk1_A54, m.attributes[0]))
- #print(d.averageGain(monk1_A54, m.attributes[1]))
- #print(d.averageGain(monk1_A54, m.attributes[2]))
- #print(d.averageGain(monk1_A54, m.attributes[3]))
- #print(d.averageGain(monk1_A54, m.attributes[4]))
- #print(d.averageGain(monk1_A54, m.attributes[5]))
- #creat an empty list to call
- monk_subsets = []
- for i in range(4):
- monk_subsets.append(d.select(m.monk1, m.attributes[4],i+1))
- #print(a5_list)
- for i in range(4):
- print('Monk1 A5 Subsets = ' + str(i+1))
- for j in range(6):
- print(' ' + 'Monk1'+ ' ' + 'A'+ str(j+1) + ' ' + '=' +
- str(d.averageGain(monk_subsets[i], m.attributes[j])))
- #for set in monk_subsets:
- # for attributes in m.attributes:
- # print(' '+ str(d.averageGain(monk_subsets[i], m.attributes[i])))
- """
- Assignment 5 Build and draw a tree and compute error
- """
- #t = d.buildTree(m.monk3, m.attributes)
- #error= 1-(d.check(t, m.monk3test))
- #print(error)
- #
- #
- #
- #s = d.allPruned(t)[11]
- #print(len(d.allPruned(s)))
- #
- #u.drawTree(s)
- """
- Assignment 6 write a loop that prunes the trees and keeps pruning until the
- model reaches a better value
- """
- import random
- def partition(data, fraction):
- ldata = list(data)
- random.shuffle(ldata)
- breakPoint = int(len(ldata) * fraction)
- return ldata[:breakPoint], ldata[breakPoint:]
- monk1train, monk1val = partition(m.monk1, 0.6)
- """
- while (all pruned function)error >= error(original tree)
- compute(error (all pruned))
- print(tree)
- """
- original_tree= d.buildTree(monk1train, m.attributes)
- #pruned_tree = d.allPruned(original_tree)[0]
- #error_original_tree = 1-(d.check(original_tree, monk1val))
- #print(error_original_tree)
- #error_pruned_tree = 1 - (d.check(pruned_tree, monk1val))
- #print(error_pruned_tree)
- #
- #for i in range(100):
- # while error_original_tree <= error_pruned_tree:
- # pruned_tree = d.allPruned(original_tree)[i]
- # print(error_pruned_tree)
- """
- Input: Tree and a validation set.
- Returns a sorted list of tuples with pruned trees and accuracy like:
- [(tree, 0.8),(tree2, 0.85),...]
- """
- def makelist(origtree, val):
- lst = d.allPruned(origtree)
- output = []
- for x in lst:
- output.append((x,d.check(x,val)))
- output = sorted(output,key=itemgetter(1))
- return output
- print(makelist(original_tree, monk1val))
- """
- Looks if the performance of a tree can be improved by pruning recursively.
- Returns error number of the improved tree.
- """
- def error_of_improved_tree(original_tree,val):
- error = 1 - d.check(original_tree, val)
- lst = makelist(original_tree, val)
- if d.check(original_tree, val) > lst[-1][1]:
- return error
- else:
- return error_of_improved_tree(lst[-1][0],val)
- print(error_of_improved_tree(original_tree,monk1val))
- def biggie(training, partition_parameter):
- lst = []
- for i in range(0,10):
- train, val = partition(training, partition_parameter)
- #original_tree = d.buildTree(train, m.attributes)
- lst.append(error_of_improved_tree)
- return lst
- print(biggie(m.monk1, 0.3))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement