Advertisement
Guest User

Untitled

a guest
Sep 11th, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. class SVMSentiment(Base):
  2. """Predict fine-grained sentiment scores using a sklearn
  3. linear Support Vector Machine (SVM) pipeline."""
  4. def __init__(self, model_file: str=None) -> None:
  5. super().__init__()
  6. from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
  7. from sklearn.linear_model import SGDClassifier
  8. from sklearn.pipeline import Pipeline
  9. self.pipeline = Pipeline(
  10. [
  11. ('vect', CountVectorizer()),
  12. ('tfidf', TfidfTransformer()),
  13. ('clf', SGDClassifier(
  14. loss='hinge',
  15. penalty='l2',
  16. alpha=1e-3,
  17. random_state=42,
  18. max_iter=100,
  19. learning_rate='optimal',
  20. tol=None,
  21. )),
  22. ]
  23. )
  24.  
  25. def predict(self, train_file: str, test_file: str, lower_case: bool=False) -> pd.DataFrame:
  26. "Train model using sklearn pipeline"
  27. train_df = self.read_data(train_file, lower_case)
  28. learner = self.pipeline.fit(train_df['text'], train_df['truth'])
  29. # Predict class labels using the learner and output DataFrame
  30. test_df = self.read_data(test_file, lower_case)
  31. test_df['pred'] = learner.predict(test_df['text'])
  32. return test_df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement