Guest User

Untitled

a guest
Feb 18th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.12 KB | None | 0 0
  1. from sklearn.svm import SVC
  2. from matplotlib import pyplot as plt
  3. import pandas as pd
  4. import numpy as np
  5. from itertools import izip
  6. from sklearn.metrics import classification_report as evaluate, accuracy_score
  7. from create_sentiment_featuresets import create_feature_sets_and_labels
  8.  
  9. # File Sources
  10. pos = './data/pos.txt'
  11. neg = './data/neg.txt'
  12.  
  13. def train_model(model, train_x, train_y):
  14. model.fit(train_x, train_y)
  15. return model
  16.  
  17.  
  18. def evaluate_model(model, test_x, test_y):
  19. predict_y = model.predict(test_x)
  20. classification_report = evaluate(test_y, predict_y, target_names=target_names, output_dict=True)
  21. accuracy = accuracy_score(test_y, predict_y, normalize=False)
  22. return accuracy, classification_report
  23.  
  24.  
  25. def autolabel(rects, ax):
  26. """
  27. Attach a text label above each bar displaying its height
  28. """
  29. for rect in rects:
  30. height = rect.get_height()
  31. ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
  32. '%d' % int(height),
  33. ha='center', va='bottom')
  34.  
  35.  
  36. def export_results(kernels, accuracies, reports, n_examples, plt):
  37. # Classification Reports
  38. for i, this_kernel in enumerate(kernels):
  39. df = pd.DataFrame(reports[i]).T
  40. df.to_csv(this_kernel+'_report.csv', sep='\t')
  41.  
  42. # Correct Predictions Graph
  43. print ("Correct Predictions Graph...")
  44. y_pos = np.arange(len(kernels))
  45. fig, ax = plt.subplots()
  46. rects = ax.bar(y_pos, accuracies, color='b', align='center', alpha=0.5)
  47. plt.xticks(y_pos, [x.upper() for x in kernels])
  48. ax.set_ylim([0, n_examples])
  49. plt.xlabel('Kernels')
  50. plt.ylabel('Examples correctly predicted')
  51. plt.title('Correct Predictions (Total Examples: {})'.format(n_examples))
  52. autolabel(rects, ax)
  53. plt.show()
  54. fig.savefig('correct_predictions.png')
  55.  
  56. # Accuracy Graph
  57. print ("Plotting Accuracy Graph...")
  58. accuracies = [(accuracy/n_examples)*100 for accuracy in accuracies]
  59. y_pos = np.arange(len(kernels))
  60. fig, ax = plt.subplots()
  61. rects = ax.bar(y_pos, accuracies, color='b', align='center', alpha=0.5)
  62. plt.xticks(y_pos, [x.upper() for x in kernels])
  63. ax.set_ylim([0, 100])
  64. plt.xlabel('Kernels')
  65. plt.ylabel('Accuracy in %')
  66. plt.title('Accuracy')
  67. autolabel(rects, ax)
  68. plt.show()
  69. fig.savefig('accuracy_percentage.png')
  70.  
  71.  
  72. if __name__ == "__main__":
  73. print ("\n" + "="*30 + " Start " + "="*30 + "\n")
  74. print ("Preprocessing Data...")
  75. train_x, train_y, test_x, test_y = create_feature_sets_and_labels(pos, neg, test_size=0.3) # 30% of the data for testing/70% for training
  76. """
  77. Data:
  78. Total number of examples = 10632
  79. Number of features = 423
  80. Total number of classes = 2
  81. (0 and 1; denoting 'Positive' or 'Negative' Sentiment)
  82. Training Split = 7464
  83. Testing Split = 3198
  84. """
  85. target_names = ['Positive', 'Negative']
  86.  
  87. print ("Training Examples: {} | Training Labels: {}".format(len(train_x), len(train_y)))
  88. print ("Testing Examples : {} | Testing Labels : {}".format(len(test_x), len(test_y)))
  89.  
  90. print ("Applying Support Vector Machines...")
  91. kernels = ('linear', 'poly', 'rbf', 'sigmoid') # List of kernels
  92. accuracies = [] # List of accuracies
  93. reports = [] # List of reports
  94. for kernel in kernels:
  95. print ("\nUsing Kernel: {}".format(kernel))
  96. print ("\tStep 1/3: Creating Model")
  97. if (kernel=='poly'):
  98. model = SVC(C=10, kernel=kernel, gamma='scale', degree=2)
  99. elif (kernel=='linear'):
  100. model = SVC(C=10, kernel=kernel)
  101. else:
  102. model = SVC(C=10, kernel=kernel, gamma='scale')
  103. print ("\tStep 2/3: Training Model")
  104. model = train_model(model, train_x, train_y)
  105. print ("\tStep 3/3: Evaluating Model")
  106. accuracy, report = evaluate_model(model, test_x, test_y)
  107. accuracies.append(accuracy)
  108. reports.append(report)
  109.  
  110. export_results(kernels, accuracies, reports, len(test_x), plt)
  111. print ("\n" + "="*30 + " End " + "="*30 + "\n")
Add Comment
Please, Sign In to add comment