Arquivo principal

# file main.py

import model
import pgConnector
import dataParser
import attribute
import math

# This function assumes that there are "enough" objects on the data set to build a training set with '#training_size' elements
def buid_training_table(dbc, training_table_name, data_table_name, class_list, training_size, class_field = "class"):
  class_amts = {}                                                            # How many of each class will be put on the training table
  row_amt    = dbc.do_query("select count(*) from " + data_table_name)[0][0] # How many rows on data table

  for c in class_list:
    # How many objects of class 'c' are there in the data table
    class_amt_data = dbc.do_query("select count(*) from " + data_table_name + " where " + class_field + " = " + str(c))[0][0]

    class_pct       = class_amt_data/row_amt                # What percentage of objects are of class 'c'
    class_amts[c]   = math.floor(class_pct * training_size) # How many objs of class 'c' will be put on the training table

  # Creating the training table and adding the correct amt of objects of class 'class_list[0]' on it
  create_query_aux = "select * from " + data_table_name + " where " + class_field + " = " + str(class_list[0]) + " limit " + str(class_amts[class_list[0]])
  dbc.fetchless_query("create table " + training_table_name + " as (" + create_query_aux + ")")

  # Adding objs of the other classes on the table
  for c_i in class_list[1:]:
    insert_query = " select * from " + data_table_name + " where " + class_field + " = " + str(c_i) + " limit " + str(class_amts[c_i])
    dbc.fetchless_query("insert into " + training_table_name + insert_query)

def run_tests(classify_func, data, verbose_print):
  # Side artifacts of the program
  true_positives  = 0
  true_negatives  = 0
  false_positives = 0
  false_negatives = 0

  for obj in data:
    # Classifying each obj
    obj_probs = classify_func(obj[:-1])

    c_1_prob = obj_probs[True]
    c_2_prob = obj_probs[False]

    real_class       = obj[len(obj)-1].value
    calculated_class = True if c_1_prob > c_2_prob else False

    # Updating the appropriate count variable
    if(calculated_class and real_class):
      true_positives += 1
    elif((not calculated_class) and (not real_class)):
      true_negatives += 1
    elif((not calculated_class) and real_class):
      false_negatives += 1
    elif(calculated_class and (not real_class)):
      false_positives += 1

  # Main artifacts of the program
  recall    = true_positives/(true_positives + false_negatives)
  precision = true_positives/(true_positives + false_positives)
  f_measure = (2*precision*recall)/(precision+recall)

  if verbose_print:
    print("True Positives: " + str(true_positives))
    print("True Negatives: " + str(true_negatives))
    print("False Positives: " + str(false_positives))
    print("False Negatives: " + str(false_negatives))

  print("Recall: " + str(recall))
  print("Precision: " + str(precision))
  print("F-Measure: " + str(f_measure))

####################################################################################################################################################

############ Consts

DBC                 = pgConnector.PgConnector("postgres", "BatatinhaFrita123", "PGC-II", 'n')
DP                  = dataParser.DataParser(DBC)
DATA_TABLE_NAME     = "teste1_hom_full"
TRAINING_TABLE_NAME = "training_data_for_" + DATA_TABLE_NAME
CLASS_LIST          = [True, False]
VERBOSE             = False
TRAINING_SIZE       = 5000

############ Main

buid_training_table(DBC, TRAINING_TABLE_NAME, DATA_TABLE_NAME, CLASS_LIST, TRAINING_SIZE)

# The 'parse_objects' function returns a tuple with the field names and the parsed data, but since we're not interested
# on the field names, we discard the first element
_, parsed_data = DP.parse_objects(DATA_TABLE_NAME, excluded_fields=["order_item_seq_id", "id"])
modelo         = model.TreeAugmentedNB[bool](CLASS_LIST, DBC, TRAINING_TABLE_NAME, excluded_fields=["order_item_seq_id", "id"])

modelo.train()

print("-----------------------------------------------------------------------------")
print("Naive Bayes:")
run_tests(modelo.classify_super, parsed_data, VERBOSE)

print("-----------------------------------------------------------------------------")
print("Tree Augmented Naive Bayes:")
run_tests(modelo.classify, parsed_data, VERBOSE)

if VERBOSE:
  modelo.print_temp_pairs()

print("")

DBC.fetchless_query("drop table " + TRAINING_TABLE_NAME)
DBC.close()