Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.datasets import fetch_20newsgroups
- from sklearn.feature_extraction.text import CountVectorizer
- cats = ['alt.atheism', 'sci.space']
- news = fetch_20newsgroups(subset='train', categories=cats)
- vectorizer = CountVectorizer()
- X = vectorizer.fit_transform(news.data)
- y = np.array(news.target)
- # Obtain test and train matrices and vectors
- from sklearn.cross_validation import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state = 42)
- # Classify
- clf = MultinomialNB()
- clf.fit(X_train, y_train) # train using X_train, y_train
- y_predicted = clf.predict(X_test) # Classify X_test and obtain y_predicted
- # Evaluate performance = compare y_predicted and y_test
- #Directly from clf
- print("Accuracy", clf.score(X_test, y_test)) ## run predict under cover
- # using metrics
- from sklearn import metrics
- print(metrics.classification_report(y_test, y_predicted))
- print(metrics.confusion_matrix(y_test, y_predicted))
- print(metrics.f1_score(y_test, y_predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement