Advertisement
Arham-4

Bayesian Learning

Sep 29th, 2021
960
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.23 KB | None | 0 0
  1. import sys
  2.  
  3. def load_features(train_f):
  4.     features = {}
  5.     feature_for_index = {}
  6.     index = 0
  7.     for feature in train_f.readline().replace('\n', '').split('\t'):
  8.         features[feature] = []
  9.         feature_for_index[index] = feature
  10.         index += 1
  11.  
  12.     for line in train_f:
  13.         split = line.replace('\n', '').split('\t')
  14.         for i in range(len(split)):
  15.             features[feature_for_index[i]].append(int(split[i]))
  16.  
  17.     return features
  18.  
  19. def frequency_table(values):
  20.     unique_value_count = {}
  21.     for value in values:
  22.         if value not in unique_value_count:
  23.             unique_value_count[value] = 1
  24.         else:
  25.             unique_value_count[value] += 1
  26.     return unique_value_count
  27.  
  28. def filter_feature_for_class(features, clazz, feature):
  29.     filtered_values = []
  30.     for i in range(len(features['class'])):
  31.         if features['class'][i] == clazz:
  32.             filtered_values.append(features[feature][i])
  33.     return filtered_values
  34.  
  35. def print_learning(features):
  36.     class_freq_table = frequency_table(features['class'])
  37.     total = sum(class_freq_table.values())
  38.     for clazz in sorted(class_freq_table.keys()):
  39.         class_total = class_freq_table[clazz]
  40.         class_p = class_total / total
  41.         print('P(class=' + str(clazz) + ')=%.2f ' % class_p, end='')
  42.        
  43.         for feature in features.keys():
  44.             if feature == 'class':
  45.                 continue
  46.             values_for_feature = filter_feature_for_class(features, clazz, feature)
  47.             feature_freq_table = frequency_table(values_for_feature)
  48.            
  49.             for feature_value in sorted(feature_freq_table.keys()):
  50.                 count = feature_freq_table[feature_value]
  51.                 feature_value_p = count / class_total
  52.                 print('P(' + feature + '=' + str(feature_value) + '|' + str(clazz) + ')=%.2f ' % feature_value_p, end='')
  53.        
  54.         print()
  55.  
  56. if len(sys.argv) != 3:
  57.     print('You must specify only a training data file and test data file in the program parameters; nothing more or less.')
  58. else:
  59.     train_file = sys.argv[1]
  60.     test_file = sys.argv[2]
  61.  
  62.     train_f = open(train_file, 'r')
  63.     features = load_features(train_f)
  64.  
  65.     print_learning(features)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement