Advertisement
Guest User

Untitled

a guest
Feb 27th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. x_train=train_df.data[0:7000]
  2. y_train=train_df.class_num[0:7000]
  3.  
  4. def my_tokenize(s):
  5. sbs =SnowballStemmer('english')
  6. return [sbs.stem(w) for w in wordpunct_tokenize(s)]
  7.  
  8.  
  9. vect = CountVectorizer(tokenizer=my_tokenize,stop_words='english',max_features=5000)
  10. # learn training data vocabulary, then use it to create a document-term matrix
  11. train_dtm =vect.fit_transform(x_train)
  12. print (train_dtm)
  13. #transform testing data (using fitted vocabulary) into a document-term matrix
  14. test_dtm=vect.transform(x_test)
  15. #instantiate a Multinomial Naive Bayes model
  16. nb = MultinomialNB(fit_prior=True)
  17. #train the model train_dtm
  18. nb.fit(train_dtm, y_train)
  19. # make class predictions for test_dtm
  20. y_test = nb.predict(test_dtm)
  21.  
  22. run1
  23. ....
  24. ....
  25. (6998, 2153) 1
  26. (6998, 3908) 1
  27. (6998, 183) 1
  28. (6998, 1369) 3
  29. (6998, 43) 1
  30. (6998, 185) 2
  31. (6998, 2389) 1
  32. (6998, 2137) 1
  33. (6998, 2757) 1
  34. (6998, 555) 1
  35. (6998, 847) 1
  36. (6998, 853) 1
  37. (6998, 2994) 1
  38. (6998, 3012) 1
  39. (6999, 3185) 1
  40. (6999, 2292) 1
  41. (6999, 402) 1
  42. (6999, 43) 1
  43. (6999, 4774) 1
  44. (6999, 4566) 1
  45. (6999, 2940) 1
  46. (6999, 555) 1
  47. (6999, 847) 1
  48. (6999, 1562) 1
  49. (6999, 1294) 1
  50.  
  51. run2
  52. ....
  53. ....
  54. (6997, 3889) 2
  55. (6997, 2971) 1
  56. (6997, 85) 2
  57. (6997, 55) 3
  58. (6997, 139) 9
  59. (6997, 3006) 2
  60. (6998, 2981) 1
  61. (6998, 3172) 1
  62. (6998, 43) 1
  63. (6998, 1) 1
  64. (6998, 2338) 1
  65. (6998, 4063) 1
  66. (6998, 3921) 1
  67. (6998, 545) 1
  68. (6998, 842) 1
  69. (6998, 2833) 1
  70. (6998, 3889) 1
  71. (6998, 139) 1
  72. (6999, 43) 1
  73. (6999, 1) 1
  74. (6999, 153) 1
  75. (6999, 545) 1
  76. (6999, 842) 1
  77. (6999, 4760) 1
  78. (6999, 3889) 1
  79.  
  80. run3
  81. ....
  82. ....
  83. (6994, 2977) 1
  84. (6994, 817) 2
  85. (6994, 132) 1
  86. (6994, 144) 1
  87. (6995, 3946) 1
  88. (6996, 2838) 1
  89. (6996, 4858) 1
  90. (6996, 2427) 1
  91. (6996, 2153) 1
  92. (6996, 3010) 1
  93. (6997, 4995) 1
  94. (6997, 4809) 1
  95. (6997, 1824) 1
  96. (6997, 4833) 1
  97. (6998, 1307) 1
  98. (6998, 4889) 1
  99. (6998, 144) 2
  100. (6999, 44) 1
  101. (6999, 162) 1
  102. (6999, 3428) 1
  103. (6999, 551) 1
  104. (6999, 836) 1
  105. (6999, 2928) 1
  106. (6999, 3288) 1
  107. (6999, 3909) 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement