Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class SVMSentiment(Base):
- """Predict fine-grained sentiment scores using a sklearn
- linear Support Vector Machine (SVM) pipeline."""
- def __init__(self, model_file: str=None) -> None:
- super().__init__()
- from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
- from sklearn.linear_model import SGDClassifier
- from sklearn.pipeline import Pipeline
- self.pipeline = Pipeline(
- [
- ('vect', CountVectorizer()),
- ('tfidf', TfidfTransformer()),
- ('clf', SGDClassifier(
- loss='hinge',
- penalty='l2',
- alpha=1e-3,
- random_state=42,
- max_iter=100,
- learning_rate='optimal',
- tol=None,
- )),
- ]
- )
- def predict(self, train_file: str, test_file: str, lower_case: bool=False) -> pd.DataFrame:
- "Train model using sklearn pipeline"
- train_df = self.read_data(train_file, lower_case)
- learner = self.pipeline.fit(train_df['text'], train_df['truth'])
- # Predict class labels using the learner and output DataFrame
- test_df = self.read_data(test_file, lower_case)
- test_df['pred'] = learner.predict(test_df['text'])
- return test_df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement