Guest User

Untitled

a guest
Oct 22nd, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.77 KB | None | 0 0
  1. def most_informative_feature_for_binary_classification(vectorizer, classifier, n=100):
  2.  
  3.  
  4. """Identify most important features if given a vectorizer and binary classifier. Set n to the number
  5. of weighted features you would like to show.
  6. """
  7. def read_counter():
  8. return loads(open("counter.json", "r").read()) + 1 if path.exists("counter.json") else 0
  9.  
  10. def write_counter():
  11. with open("counter.json", "w") as f:
  12. f.write(dumps(counter))
  13.  
  14. counter = 1
  15.  
  16. counter = read_counter()
  17. atexit.register(write_counter)
  18. if counter >= 7:
  19. counter = 0
  20.  
  21. # additional stopwords to be remove
  22. # Open a file and read it into memory
  23. file = open('..stopwords.txt')
  24. additional_stopwords = file.read()
  25. additional_stopwords = additional_stopwords.split()
  26.  
  27. class_labels = classifier.classes_
  28. feature_names = vectorizer.get_feature_names()
  29. feature_names = [word for word in feature_names if word not in additional_stopwords]
  30.  
  31. topn_class1 = sorted(zip(classifier.coef_[0], feature_names))[:n]
  32. topn_class2 = sorted(zip(classifier.coef_[0], feature_names))[-n:]
  33.  
  34. # class_labels = category
  35. # coef = co-effecient
  36. # feat = most informative feature
  37. if counter == 1:
  38. for coef, feat in topn_class1:
  39. print(class_labels[0], coef, feat)
  40. # print(class_labels)
  41. # -> output: [2 3 4 5 6 7 8] index of this 0 1 2 3 4 5 6
  42.  
  43. print()
  44.  
  45. for coef, feat in reversed(topn_class2):
  46. print(class_labels[1], coef, feat)
  47.  
  48. elif counter == 2:
  49. for coef, feat in topn_class1:
  50. print(class_labels[1], coef, feat)
  51.  
  52. print()
  53.  
  54. for coef, feat in reversed(topn_class2):
  55. print(class_labels[2], coef, feat)
  56.  
  57. elif counter == 3:
  58. for coef, feat in topn_class1:
  59. print(class_labels[2], coef, feat)
  60.  
  61. print()
  62.  
  63. for coef, feat in reversed(topn_class2):
  64. print(class_labels[3], coef, feat)
  65.  
  66. elif counter == 4:
  67. for coef, feat in topn_class1:
  68. print(class_labels[3], coef, feat)
  69.  
  70. print()
  71.  
  72. for coef, feat in reversed(topn_class2):
  73. print(class_labels[4], coef, feat)
  74.  
  75. elif counter == 5:
  76. for coef, feat in topn_class1:
  77. print(class_labels[4], coef, feat)
  78.  
  79. print()
  80.  
  81. for coef, feat in reversed(topn_class2):
  82. print(class_labels[5], coef, feat)
  83.  
  84. elif counter == 6:
  85. for coef, feat in topn_class1:
  86. print(class_labels[5], coef, feat)
  87.  
  88. print()
  89.  
  90. for coef, feat in reversed(topn_class2):
  91. print(class_labels[6], coef, feat)
  92.  
  93. else:
  94. print("=== PLEASE RUN PROGRAM AGAIN TO VIEW THE CO-EFFICIENT FOR THE CHOSEN MODEL ===")
  95.  
  96. 2 -8.322094697329087 aaa
  97. 2 -8.322094697329087 aaa cm
  98. 2 -8.322094697329087 aaa cm underwent
  99. 2 -8.322094697329087 aaa free
  100. 2 -8.322094697329087 aaa free ivc
  101.  
  102. 3 -8.010764835561018 assymetry imp giddiness
  103. 3 -8.144858449457846 admitted feb year
  104. 3 -8.164330364141858 agreeable dre brown
  105. 3 -8.172447581146958 aerobic anaerobic labeled
  106. 3 -8.180391164585233 actually body
  107.  
  108. 3 -8.322580751462969 aaa
  109. 3 -8.322580751462969 aaa cm
  110. 3 -8.322580751462969 aaa cm underwent
  111. 3 -8.322580751462969 aaa free
  112. 3 -8.322580751462969 aaa free ivc
  113.  
  114. 4 -8.0112508896949 assymetry moving
  115. 4 -8.145344503591728 admitted feb year
  116. 4 -8.16481641827574 agreeable dre brown
  117. 4 -8.17293363528084 aerobic anaerobic labeled
  118. 4 -8.180877218719115 actually body
  119.  
  120. 4 -8.322337753927105 aaa
  121. 4 -8.322337753927105 aaa cm
  122. 4 -8.322337753927105 aaa cm underwent
  123. 4 -8.322337753927105 aaa free
  124. 4 -8.322337753927105 aaa free ivc
  125.  
  126. 5 -8.011007892159036 assymetry imp
  127. 5 -8.145101506055864 admitted frequent falls
  128. 5 -8.164573420739876 agreeable early review
  129. 5 -8.172690637744976 af anticoagulation
  130. 5 -8.18063422118325 actually body
Add Comment
Please, Sign In to add comment