Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- x_train=train_df.data[0:7000]
- y_train=train_df.class_num[0:7000]
- def my_tokenize(s):
- sbs =SnowballStemmer('english')
- return [sbs.stem(w) for w in wordpunct_tokenize(s)]
- vect = CountVectorizer(tokenizer=my_tokenize,stop_words='english',max_features=5000)
- # learn training data vocabulary, then use it to create a document-term matrix
- train_dtm =vect.fit_transform(x_train)
- print (train_dtm)
- #transform testing data (using fitted vocabulary) into a document-term matrix
- test_dtm=vect.transform(x_test)
- #instantiate a Multinomial Naive Bayes model
- nb = MultinomialNB(fit_prior=True)
- #train the model train_dtm
- nb.fit(train_dtm, y_train)
- # make class predictions for test_dtm
- y_test = nb.predict(test_dtm)
- run1
- ....
- ....
- (6998, 2153) 1
- (6998, 3908) 1
- (6998, 183) 1
- (6998, 1369) 3
- (6998, 43) 1
- (6998, 185) 2
- (6998, 2389) 1
- (6998, 2137) 1
- (6998, 2757) 1
- (6998, 555) 1
- (6998, 847) 1
- (6998, 853) 1
- (6998, 2994) 1
- (6998, 3012) 1
- (6999, 3185) 1
- (6999, 2292) 1
- (6999, 402) 1
- (6999, 43) 1
- (6999, 4774) 1
- (6999, 4566) 1
- (6999, 2940) 1
- (6999, 555) 1
- (6999, 847) 1
- (6999, 1562) 1
- (6999, 1294) 1
- run2
- ....
- ....
- (6997, 3889) 2
- (6997, 2971) 1
- (6997, 85) 2
- (6997, 55) 3
- (6997, 139) 9
- (6997, 3006) 2
- (6998, 2981) 1
- (6998, 3172) 1
- (6998, 43) 1
- (6998, 1) 1
- (6998, 2338) 1
- (6998, 4063) 1
- (6998, 3921) 1
- (6998, 545) 1
- (6998, 842) 1
- (6998, 2833) 1
- (6998, 3889) 1
- (6998, 139) 1
- (6999, 43) 1
- (6999, 1) 1
- (6999, 153) 1
- (6999, 545) 1
- (6999, 842) 1
- (6999, 4760) 1
- (6999, 3889) 1
- run3
- ....
- ....
- (6994, 2977) 1
- (6994, 817) 2
- (6994, 132) 1
- (6994, 144) 1
- (6995, 3946) 1
- (6996, 2838) 1
- (6996, 4858) 1
- (6996, 2427) 1
- (6996, 2153) 1
- (6996, 3010) 1
- (6997, 4995) 1
- (6997, 4809) 1
- (6997, 1824) 1
- (6997, 4833) 1
- (6998, 1307) 1
- (6998, 4889) 1
- (6998, 144) 2
- (6999, 44) 1
- (6999, 162) 1
- (6999, 3428) 1
- (6999, 551) 1
- (6999, 836) 1
- (6999, 2928) 1
- (6999, 3288) 1
- (6999, 3909) 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement