Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class sentiment:
- def sentiment():
- data = pd.read_csv("dump.csv")
- duplicate_data = data.copy()
- duplicate_data['reviews'] = duplicate_data['reviews__body']
- duplicate_data['sentiment'] = duplicate_data['reviews__label']
- duplicate_data = duplicate_data.loc[:, ['reviews__body','reviews__label']]
- train, test = train_test_split(duplicate_data, test_size=0.2, random_state = 1)
- X_train = train['reviews__body'].values
- X_test = test['reviews__body'].values
- y_train = train['reviews__label']
- y_test = test['reviews__label']
- def tokenize(text):
- return word_tokenize(text)
- def stem(doc):
- return (stemmer.stem(w) for w in analyzer(doc))
- get_stopwords = set(stopwords.words("english"))
- vectorizer = CountVectorizer(analyzer = 'word',tokenizer = tokenize,lowercase = True,ngram_range = (1,1),stop_words = get_stopwords)
- kfolds = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
- np.random.seed(1)
- pipeline_svm = make_pipeline(vectorizer, SVC(probability=True, kernel="linear", class_weight="balanced"))
- grid_svm = GridSearchCV(pipeline_svm, param_grid = {'svc_C': [0.01, 0.1, 1]}, cv = kfolds, scoring="roc_auc", verbose=1, n_jobs=-1)
- print(grid_svm.fit(X_train, y_train))
Add Comment
Please, Sign In to add comment