Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.88 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "### Test classifier with all test dataset"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 41,
  13. "metadata": {},
  14. "outputs": [],
  15. "source": [
  16. "test_counts = vectorizer.transform(test_corpus)\n",
  17. "test_tfidf = transformer.transform(test_counts)"
  18. ]
  19. },
  20. {
  21. "cell_type": "code",
  22. "execution_count": 42,
  23. "metadata": {},
  24. "outputs": [
  25. {
  26. "data": {
  27. "text/plain": [
  28. "0.944"
  29. ]
  30. },
  31. "execution_count": 42,
  32. "metadata": {},
  33. "output_type": "execute_result"
  34. }
  35. ],
  36. "source": [
  37. "classifier.score(test_tfidf, test_labels)"
  38. ]
  39. },
  40. {
  41. "cell_type": "markdown",
  42. "metadata": {},
  43. "source": [
  44. "### Find Informative Features"
  45. ]
  46. },
  47. {
  48. "cell_type": "code",
  49. "execution_count": 43,
  50. "metadata": {},
  51. "outputs": [],
  52. "source": [
  53. "n = 20\n",
  54. "feature_names = vectorizer.get_feature_names() # Array mapping from feature integer indices to feature name"
  55. ]
  56. },
  57. {
  58. "cell_type": "code",
  59. "execution_count": 44,
  60. "metadata": {
  61. "scrolled": false
  62. },
  63. "outputs": [
  64. {
  65. "name": "stdout",
  66. "output_type": "stream",
  67. "text": [
  68. "AaronPressman 's Top 20 features \n",
  69. "\n",
  70. "products -6.843221656677828\n",
  71. "commerce -6.810211392169265\n",
  72. "phone -6.810211392169265\n",
  73. "administration -6.759908463882919\n",
  74. "securities -6.72589537844437\n",
  75. "federal -6.669434280670851\n",
  76. "court -6.656228165206716\n",
  77. "computer -6.648670161880032\n",
  78. "credit -6.59269344152553\n",
  79. "new -6.466038750420433\n",
  80. "companies -6.421187855927672\n",
  81. "banks -6.282188052280851\n",
  82. "congress -6.1691369178693325\n",
  83. "'s -5.874179559976962\n",
  84. "internet -5.48040317700573\n",
  85. "said -5.33729863768842\n",
  86. "'' -5.271277381900798\n",
  87. "`` -5.260227520807157\n",
  88. ". -4.229822040487652\n",
  89. ", -4.030488993297016\n",
  90. "\n",
  91. "\n",
  92. "AlanCrosby 's Top 20 features \n",
  93. "\n",
  94. "billion -6.651115217637997\n",
  95. "elections -6.56600473481026\n",
  96. "analysts -6.555871417225831\n",
  97. "match -6.527893548285615\n",
  98. "coalition -6.512588487497869\n",
  99. "week -6.388919463309094\n",
  100. "points -6.371236393412705\n",
  101. "index -6.340860179321937\n",
  102. "senate -6.295387040925007\n",
  103. "round -6.170196671090114\n",
  104. "( -6.150627605586323\n",
  105. ") -6.150627605586323\n",
  106. "percent -6.077633643022997\n",
  107. "market -6.022847352516765\n",
  108. "`` -5.770879251421359\n",
  109. "'' -5.745310055668684\n",
  110. "said -5.60866477446716\n",
  111. "'s -5.504211244742248\n",
  112. ". -4.294466466066362\n",
  113. ", -3.931324802675596\n",
  114. "\n",
  115. "\n",
  116. "AlexanderSmith 's Top 20 features \n",
  117. "\n",
  118. "cable -6.5614732431634435\n",
  119. "year -6.529223847082791\n",
  120. "investment -6.478300823188887\n",
  121. "british -6.463011028791085\n",
  122. "; -6.443924532066207\n",
  123. "$ -6.437760325964971\n",
  124. ") -6.38363052460965\n",
  125. "company -6.38363052460965\n",
  126. "( -6.3761314468987695\n",
  127. "group -6.3761314468987695\n",
  128. "percent -6.318081205830778\n",
  129. "& -6.296556432207335\n",
  130. "amp -6.296556432207335\n",
  131. "million -6.283439795017349\n",
  132. "'s -5.456188187197981\n",
  133. "`` -5.351676730265064\n",
  134. "'' -5.338355645441447\n",
  135. "said -5.139780496405044\n",
  136. ". -4.199710987553583\n",
  137. ", -4.107476056200915\n",
  138. "\n",
  139. "\n",
  140. "BenjaminKangLim 's Top 20 features \n",
  141. "\n",
  142. "( -6.818371181790302\n",
  143. ") -6.818371181790302\n",
  144. "state -6.79976981870553\n",
  145. "links -6.764806000459541\n",
  146. "dan -6.724913852417953\n",
  147. "people -6.710306847324062\n",
  148. "years -6.6989899973268265\n",
  149. "sentence -6.6316354347067294\n",
  150. "... -6.582288783415159\n",
  151. "government -6.495999271657508\n",
  152. "court -6.265417549885052\n",
  153. "party -6.18776190871761\n",
  154. "'' -5.5625764634394805\n",
  155. "`` -5.551799132659087\n",
  156. "beijing -5.454730907587863\n",
  157. "china -5.256907777517354\n",
  158. "said -5.081614300033448\n",
  159. "'s -5.068306343512768\n",
  160. ". -4.181346344826704\n",
  161. ", -4.087015543226064\n",
  162. "\n",
  163. "\n",
  164. "BernardHickey 's Top 20 features \n",
  165. "\n",
  166. "australian -6.496200125113844\n",
  167. "analysts -6.4895887628902855\n",
  168. "bank -6.465451563584756\n",
  169. "australia -6.330408486415948\n",
  170. "( -6.294269155165459\n",
  171. ") -6.294269155165459\n",
  172. "corp -6.252254460517263\n",
  173. "million -5.955480959210123\n",
  174. "profit -5.933900175980197\n",
  175. "year -5.921578211989878\n",
  176. "murdoch -5.905439558869782\n",
  177. "percent -5.851842538640983\n",
  178. "news -5.846674401805445\n",
  179. "$ -5.483310978602539\n",
  180. "'s -5.355618305216927\n",
  181. "`` -5.330694412171886\n",
  182. "'' -5.315426649102102\n",
  183. "said -4.8155626174681165\n",
  184. ". -4.2475736088130445\n",
  185. ", -4.224915439286149\n",
  186. "\n",
  187. "\n"
  188. ]
  189. }
  190. ],
  191. "source": [
  192. "n = 20\n",
  193. "for i in range(5):\n",
  194. " for j in label_encoder.inverse_transform([i]):\n",
  195. " print(j, \"'s Top %s features \\n\" %n)\n",
  196. " topn = sorted(zip(classifier.coef_[i], feature_names))[-n: ]\n",
  197. " for coef, feature in topn:\n",
  198. " print(feature, coef)\n",
  199. " print(\"\\n\")"
  200. ]
  201. }
  202. ],
  203. "metadata": {
  204. "kernelspec": {
  205. "display_name": "Python 3",
  206. "language": "python",
  207. "name": "python3"
  208. },
  209. "language_info": {
  210. "codemirror_mode": {
  211. "name": "ipython",
  212. "version": 3
  213. },
  214. "file_extension": ".py",
  215. "mimetype": "text/x-python",
  216. "name": "python",
  217. "nbconvert_exporter": "python",
  218. "pygments_lexer": "ipython3",
  219. "version": "3.6.8"
  220. }
  221. },
  222. "nbformat": 4,
  223. "nbformat_minor": 2
  224. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement