Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # file main.py
- import model
- import pgConnector
- import dataParser
- import attribute
- import math
- # This function assumes that there are "enough" objects on the data set to build a training set with '#training_size' elements
- def buid_training_table(dbc, training_table_name, data_table_name, class_list, training_size, class_field = "class"):
- class_amts = {} # How many of each class will be put on the training table
- row_amt = dbc.do_query("select count(*) from " + data_table_name)[0][0] # How many rows on data table
- for c in class_list:
- # How many objects of class 'c' are there in the data table
- class_amt_data = dbc.do_query("select count(*) from " + data_table_name + " where " + class_field + " = " + str(c))[0][0]
- class_pct = class_amt_data/row_amt # What percentage of objects are of class 'c'
- class_amts[c] = math.floor(class_pct * training_size) # How many objs of class 'c' will be put on the training table
- # Creating the training table and adding the correct amt of objects of class 'class_list[0]' on it
- create_query_aux = "select * from " + data_table_name + " where " + class_field + " = " + str(class_list[0]) + " limit " + str(class_amts[class_list[0]])
- dbc.fetchless_query("create table " + training_table_name + " as (" + create_query_aux + ")")
- # Adding objs of the other classes on the table
- for c_i in class_list[1:]:
- insert_query = " select * from " + data_table_name + " where " + class_field + " = " + str(c_i) + " limit " + str(class_amts[c_i])
- dbc.fetchless_query("insert into " + training_table_name + insert_query)
- def run_tests(classify_func, data, verbose_print):
- # Side artifacts of the program
- true_positives = 0
- true_negatives = 0
- false_positives = 0
- false_negatives = 0
- for obj in data:
- # Classifying each obj
- obj_probs = classify_func(obj[:-1])
- c_1_prob = obj_probs[True]
- c_2_prob = obj_probs[False]
- real_class = obj[len(obj)-1].value
- calculated_class = True if c_1_prob > c_2_prob else False
- # Updating the appropriate count variable
- if(calculated_class and real_class):
- true_positives += 1
- elif((not calculated_class) and (not real_class)):
- true_negatives += 1
- elif((not calculated_class) and real_class):
- false_negatives += 1
- elif(calculated_class and (not real_class)):
- false_positives += 1
- # Main artifacts of the program
- recall = true_positives/(true_positives + false_negatives)
- precision = true_positives/(true_positives + false_positives)
- f_measure = (2*precision*recall)/(precision+recall)
- if verbose_print:
- print("True Positives: " + str(true_positives))
- print("True Negatives: " + str(true_negatives))
- print("False Positives: " + str(false_positives))
- print("False Negatives: " + str(false_negatives))
- print("Recall: " + str(recall))
- print("Precision: " + str(precision))
- print("F-Measure: " + str(f_measure))
- ####################################################################################################################################################
- ############ Consts
- DBC = pgConnector.PgConnector("postgres", "BatatinhaFrita123", "PGC-II", 'n')
- DP = dataParser.DataParser(DBC)
- DATA_TABLE_NAME = "teste1_hom_full"
- TRAINING_TABLE_NAME = "training_data_for_" + DATA_TABLE_NAME
- CLASS_LIST = [True, False]
- VERBOSE = False
- TRAINING_SIZE = 5000
- ############ Main
- buid_training_table(DBC, TRAINING_TABLE_NAME, DATA_TABLE_NAME, CLASS_LIST, TRAINING_SIZE)
- # The 'parse_objects' function returns a tuple with the field names and the parsed data, but since we're not interested
- # on the field names, we discard the first element
- _, parsed_data = DP.parse_objects(DATA_TABLE_NAME, excluded_fields=["order_item_seq_id", "id"])
- modelo = model.TreeAugmentedNB[bool](CLASS_LIST, DBC, TRAINING_TABLE_NAME, excluded_fields=["order_item_seq_id", "id"])
- modelo.train()
- print("-----------------------------------------------------------------------------")
- print("Naive Bayes:")
- run_tests(modelo.classify_super, parsed_data, VERBOSE)
- print("-----------------------------------------------------------------------------")
- print("Tree Augmented Naive Bayes:")
- run_tests(modelo.classify, parsed_data, VERBOSE)
- if VERBOSE:
- modelo.print_temp_pairs()
- print("")
- DBC.fetchless_query("drop table " + TRAINING_TABLE_NAME)
- DBC.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement