Guest User

Untitled

a guest
Sep 29th, 2018
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.03 KB | None | 0 0
  1. from sklearn.feature_extraction.text import CountVectorizer
  2. from sklearn.linear_model import LogisticRegression
  3. from sklearn.metrics import accuracy_score
  4. from sklearn.model_selection import train_test_split
  5.  
  6. ngram_vectorizer = CountVectorizer(binary=True, ngram_range=(1, 2))
  7. ngram_vectorizer.fit(reviews_train_clean)
  8. X = ngram_vectorizer.transform(reviews_train_clean)
  9. X_test = ngram_vectorizer.transform(reviews_test_clean)
  10.  
  11. X_train, X_val, y_train, y_val = train_test_split(
  12. X, target, train_size = 0.75
  13. )
  14.  
  15. for c in [0.01, 0.05, 0.25, 0.5, 1]:
  16.  
  17. lr = LogisticRegression(C=c)
  18. lr.fit(X_train, y_train)
  19. print ("Accuracy for C=%s: %s"
  20. % (c, accuracy_score(y_val, lr.predict(X_val))))
  21.  
  22. # Accuracy for C=0.01: 0.88416
  23. # Accuracy for C=0.05: 0.892
  24. # Accuracy for C=0.25: 0.89424
  25. # Accuracy for C=0.5: 0.89456
  26. # Accuracy for C=1: 0.8944
  27.  
  28. final_model = LogisticRegression(C=0.5)
  29. final_model.fit(X, target)
  30. print ("Final Accuracy: %s"
  31. % accuracy_score(target, final_model.predict(X_test)))
  32.  
  33. # Final Accuracy: 0.898
Add Comment
Please, Sign In to add comment