Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- def load_features(train_f):
- features = {}
- feature_for_index = {}
- index = 0
- for feature in train_f.readline().replace('\n', '').split('\t'):
- features[feature] = []
- feature_for_index[index] = feature
- index += 1
- for line in train_f:
- split = line.replace('\n', '').split('\t')
- for i in range(len(split)):
- features[feature_for_index[i]].append(int(split[i]))
- return features
- def frequency_table(values):
- unique_value_count = {}
- for value in values:
- if value not in unique_value_count:
- unique_value_count[value] = 1
- else:
- unique_value_count[value] += 1
- return unique_value_count
- def filter_feature_for_class(features, clazz, feature):
- filtered_values = []
- for i in range(len(features['class'])):
- if features['class'][i] == clazz:
- filtered_values.append(features[feature][i])
- return filtered_values
- def print_learning(features):
- class_freq_table = frequency_table(features['class'])
- total = sum(class_freq_table.values())
- for clazz in sorted(class_freq_table.keys()):
- class_total = class_freq_table[clazz]
- class_p = class_total / total
- print('P(class=' + str(clazz) + ')=%.2f ' % class_p, end='')
- for feature in features.keys():
- if feature == 'class':
- continue
- values_for_feature = filter_feature_for_class(features, clazz, feature)
- feature_freq_table = frequency_table(values_for_feature)
- for feature_value in sorted(feature_freq_table.keys()):
- count = feature_freq_table[feature_value]
- feature_value_p = count / class_total
- print('P(' + feature + '=' + str(feature_value) + '|' + str(clazz) + ')=%.2f ' % feature_value_p, end='')
- print()
- if len(sys.argv) != 3:
- print('You must specify only a training data file and test data file in the program parameters; nothing more or less.')
- else:
- train_file = sys.argv[1]
- test_file = sys.argv[2]
- train_f = open(train_file, 'r')
- features = load_features(train_f)
- print_learning(features)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement