Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.feature_extraction.text import CountVectorizer
- from sklearn.linear_model import LogisticRegression
- from sklearn.metrics import accuracy_score
- from sklearn.model_selection import train_test_split
- ngram_vectorizer = CountVectorizer(binary=True, ngram_range=(1, 2))
- ngram_vectorizer.fit(reviews_train_clean)
- X = ngram_vectorizer.transform(reviews_train_clean)
- X_test = ngram_vectorizer.transform(reviews_test_clean)
- X_train, X_val, y_train, y_val = train_test_split(
- X, target, train_size = 0.75
- )
- for c in [0.01, 0.05, 0.25, 0.5, 1]:
- lr = LogisticRegression(C=c)
- lr.fit(X_train, y_train)
- print ("Accuracy for C=%s: %s"
- % (c, accuracy_score(y_val, lr.predict(X_val))))
- # Accuracy for C=0.01: 0.88416
- # Accuracy for C=0.05: 0.892
- # Accuracy for C=0.25: 0.89424
- # Accuracy for C=0.5: 0.89456
- # Accuracy for C=1: 0.8944
- final_model = LogisticRegression(C=0.5)
- final_model.fit(X, target)
- print ("Final Accuracy: %s"
- % accuracy_score(target, final_model.predict(X_test)))
- # Final Accuracy: 0.898
Add Comment
Please, Sign In to add comment