Advertisement
Kinrin

Untitled

May 24th, 2018
242
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.12 KB | None | 0 0
  1. import csv
  2. import pandas
  3. import sklearn
  4. import numpy as np
  5. from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
  6. from sklearn.naive_bayes import MultinomialNB, BernoulliNB
  7. from sklearn.svm import LinearSVC
  8. from sklearn.metrics import confusion_matrix,accuracy_score
  9. from sklearn.pipeline import make_pipeline
  10. from sklearn.tree import DecisionTreeClassifier
  11. from sklearn.feature_extraction.text import TfidfVectorizer
  12.  
  13. messages = pandas.read_csv('SMSSpamCollection.txt', sep='\t',
  14.                            names=["label", "data"])
  15. test = pandas.read_csv('testspam', sep='\t',
  16.                            names=["label", "data"])
  17. x_train = messages.data
  18. x_test = test.data
  19. y_train = messages.label
  20. y_test = test.label
  21.  
  22. cv=CountVectorizer(stop_words='english')
  23. cv.fit_transform(x_train)
  24. freq_term_matrix = cv.transform(x_train)
  25. tf=TfidfTransformer()
  26. tf.fit_transform(freq_term_matrix)
  27. tf_idf_matrix = tf.transform(freq_term_matrix)
  28. train_label=np.zeros(len(y_train))
  29. for i in range(len(y_train)):
  30.     if(y_train[i]=='spam'):
  31.         train_label[i]=1
  32.  
  33. cv_test=CountVectorizer(stop_words='english')
  34. cv_test.fit_transform(x_test)
  35. freq_term_matrix_test = cv.transform(x_test)
  36. tf_test=TfidfTransformer()
  37. tf_test.fit_transform(freq_term_matrix_test)
  38. tf_idf_matrix_test = tf_test.transform(freq_term_matrix_test)
  39. test_label=np.zeros(len(y_test))
  40. for j in range(len(y_test)):
  41.     if(y_test[j]=='spam'):
  42.         test_label[j]=1
  43.  
  44.  
  45. model1 = LinearSVC()
  46. model2 = MultinomialNB()
  47. model3 = BernoulliNB()
  48. model4 = DecisionTreeClassifier()
  49.  
  50. model1.fit(tf_idf_matrix.todense(),train_label)
  51. model2.fit(tf_idf_matrix.todense(),train_label)
  52. model3.fit(tf_idf_matrix.todense(),train_label)
  53. model4.fit(tf_idf_matrix.todense(),train_label)
  54.  
  55. result1 = model1.predict(tf_idf_matrix_test.todense() )
  56. result2 = model2.predict(tf_idf_matrix_test.todense() )
  57. result3 = model3.predict(tf_idf_matrix_test.todense() )
  58. result4 = model4.predict(tf_idf_matrix_test.todense() )
  59.  
  60. mat = confusion_matrix(test_label ,result1)
  61. mat1 = confusion_matrix(test_label ,result2)
  62. mat2 = confusion_matrix(test_label ,result3)
  63. mat3 = confusion_matrix(test_label ,result4)
  64.  
  65. x = [[0 for x in range(4)] for y in range(5)]
  66. x[0][0] = 'FD/M'
  67. x[1][0] = 'SVM'
  68. x[2][0] = 'MNB'
  69. x[3][0] = 'BNB'
  70. x[4][0] = 'DTC'
  71. x[0][1] = 'acc'
  72. x[0][2] = 'ham'
  73. x[0][3] = 'spam'
  74. x[1][1] = accuracy_score(test_label, result1)
  75. x[2][1] = accuracy_score(test_label, result2)
  76. x[3][1] = accuracy_score(test_label, result3)
  77. x[4][1] = accuracy_score(test_label, result4)
  78. x[1][2] = mat[0][0] / (mat[0][0] + mat[0][1])
  79. x[2][2] = mat1[0][0] / (mat1[0][0] + mat1[0][1])
  80. x[3][2] = mat2[0][0] / (mat2[0][0] + mat2[0][1])
  81. x[4][2] = mat3[0][0] / (mat3[0][0] + mat3[0][1])
  82. x[1][3] = mat[1][1] / (mat[1][0] + mat[1][1])
  83. x[2][3] = mat1[1][1] / (mat1[1][0] + mat1[1][1])
  84. x[3][3] = mat2[1][1] / (mat2[1][0] + mat2[1][1])
  85. x[4][3] = mat3[1][1] / (mat3[1][0] + mat3[1][1])
  86.  
  87. for i in range(len(x)):
  88.     for j in range(len(x[i])):
  89.         if ((i == 0) or (j == 0)):
  90.             print(x[i][j], end=' ')
  91.         else:
  92.             print(np.around(x[i][j], decimals=2), end=' ')
  93.     print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement