Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.svm import SVC
- from matplotlib import pyplot as plt
- import pandas as pd
- import numpy as np
- from itertools import izip
- from sklearn.metrics import classification_report as evaluate, accuracy_score
- from create_sentiment_featuresets import create_feature_sets_and_labels
- # File Sources
- pos = './data/pos.txt'
- neg = './data/neg.txt'
- def train_model(model, train_x, train_y):
- model.fit(train_x, train_y)
- return model
- def evaluate_model(model, test_x, test_y):
- predict_y = model.predict(test_x)
- classification_report = evaluate(test_y, predict_y, target_names=target_names, output_dict=True)
- accuracy = accuracy_score(test_y, predict_y, normalize=False)
- return accuracy, classification_report
- def autolabel(rects, ax):
- """
- Attach a text label above each bar displaying its height
- """
- for rect in rects:
- height = rect.get_height()
- ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
- '%d' % int(height),
- ha='center', va='bottom')
- def export_results(kernels, accuracies, reports, n_examples, plt):
- # Classification Reports
- for i, this_kernel in enumerate(kernels):
- df = pd.DataFrame(reports[i]).T
- df.to_csv(this_kernel+'_report.csv', sep='\t')
- # Correct Predictions Graph
- print ("Correct Predictions Graph...")
- y_pos = np.arange(len(kernels))
- fig, ax = plt.subplots()
- rects = ax.bar(y_pos, accuracies, color='b', align='center', alpha=0.5)
- plt.xticks(y_pos, [x.upper() for x in kernels])
- ax.set_ylim([0, n_examples])
- plt.xlabel('Kernels')
- plt.ylabel('Examples correctly predicted')
- plt.title('Correct Predictions (Total Examples: {})'.format(n_examples))
- autolabel(rects, ax)
- plt.show()
- fig.savefig('correct_predictions.png')
- # Accuracy Graph
- print ("Plotting Accuracy Graph...")
- accuracies = [(accuracy/n_examples)*100 for accuracy in accuracies]
- y_pos = np.arange(len(kernels))
- fig, ax = plt.subplots()
- rects = ax.bar(y_pos, accuracies, color='b', align='center', alpha=0.5)
- plt.xticks(y_pos, [x.upper() for x in kernels])
- ax.set_ylim([0, 100])
- plt.xlabel('Kernels')
- plt.ylabel('Accuracy in %')
- plt.title('Accuracy')
- autolabel(rects, ax)
- plt.show()
- fig.savefig('accuracy_percentage.png')
- if __name__ == "__main__":
- print ("\n" + "="*30 + " Start " + "="*30 + "\n")
- print ("Preprocessing Data...")
- train_x, train_y, test_x, test_y = create_feature_sets_and_labels(pos, neg, test_size=0.3) # 30% of the data for testing/70% for training
- """
- Data:
- Total number of examples = 10632
- Number of features = 423
- Total number of classes = 2
- (0 and 1; denoting 'Positive' or 'Negative' Sentiment)
- Training Split = 7464
- Testing Split = 3198
- """
- target_names = ['Positive', 'Negative']
- print ("Training Examples: {} | Training Labels: {}".format(len(train_x), len(train_y)))
- print ("Testing Examples : {} | Testing Labels : {}".format(len(test_x), len(test_y)))
- print ("Applying Support Vector Machines...")
- kernels = ('linear', 'poly', 'rbf', 'sigmoid') # List of kernels
- accuracies = [] # List of accuracies
- reports = [] # List of reports
- for kernel in kernels:
- print ("\nUsing Kernel: {}".format(kernel))
- print ("\tStep 1/3: Creating Model")
- if (kernel=='poly'):
- model = SVC(C=10, kernel=kernel, gamma='scale', degree=2)
- elif (kernel=='linear'):
- model = SVC(C=10, kernel=kernel)
- else:
- model = SVC(C=10, kernel=kernel, gamma='scale')
- print ("\tStep 2/3: Training Model")
- model = train_model(model, train_x, train_y)
- print ("\tStep 3/3: Evaluating Model")
- accuracy, report = evaluate_model(model, test_x, test_y)
- accuracies.append(accuracy)
- reports.append(report)
- export_results(kernels, accuracies, reports, len(test_x), plt)
- print ("\n" + "="*30 + " End " + "="*30 + "\n")
Add Comment
Please, Sign In to add comment