Advertisement
Guest User

Abou bosse comme un Senegalais

a guest
Nov 15th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.01 KB | None | 0 0
  1. from sklearn.datasets import fetch_20newsgroups
  2. from sklearn.feature_extraction.text import CountVectorizer
  3. cats = ['alt.atheism', 'sci.space']
  4. news = fetch_20newsgroups(subset='train', categories=cats)
  5. vectorizer = CountVectorizer()
  6. X = vectorizer.fit_transform(news.data)
  7. y = np.array(news.target)
  8.  
  9. # Obtain test and train matrices and vectors
  10. from sklearn.cross_validation import train_test_split
  11.  
  12. X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state = 42)
  13.  
  14. # Classify
  15. clf = MultinomialNB()
  16. clf.fit(X_train, y_train) # train using X_train, y_train
  17. y_predicted = clf.predict(X_test) # Classify X_test and obtain y_predicted
  18.  
  19. # Evaluate performance = compare y_predicted and y_test
  20. #Directly from clf
  21.  
  22. print("Accuracy", clf.score(X_test, y_test)) ## run predict under cover
  23.  
  24. # using metrics
  25. from sklearn import metrics
  26.  
  27. print(metrics.classification_report(y_test, y_predicted))
  28. print(metrics.confusion_matrix(y_test, y_predicted))
  29. print(metrics.f1_score(y_test, y_predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement