Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.naive_bayes import BernoulliNB
- from sklearn.naive_bayes import MultinomialNB
- from sklearn.feature_extraction.text import CountVectorizer
- count_vect = CountVectorizer()
- values = []
- results = []
- tsv = pd.read_table("data.tsv").values.tolist()
- for row in tsv:
- values.append(row[-1])
- results.append(1 if row[1] == 'p' else 0)
- X = count_vect.fit_transform(values)
- y = np.array(results)
- b_clf = BernoulliNB()
- b_clf.fit(X, y)
- print(b_clf.score(X, y))
- #=> 0.7609423570921667
- m_clf = MultinomialNB()
- m_clf.fit(X, y)
- print(m_clf.score(X, y))
- #=> 0.7551450818115486
Add Comment
Please, Sign In to add comment