Guest User

Untitled

a guest
Mar 23rd, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. class sentiment:
  2. def sentiment():
  3. data = pd.read_csv("dump.csv")
  4. duplicate_data = data.copy()
  5.  
  6. duplicate_data['reviews'] = duplicate_data['reviews__body']
  7. duplicate_data['sentiment'] = duplicate_data['reviews__label']
  8.  
  9. duplicate_data = duplicate_data.loc[:, ['reviews__body','reviews__label']]
  10.  
  11. train, test = train_test_split(duplicate_data, test_size=0.2, random_state = 1)
  12. X_train = train['reviews__body'].values
  13. X_test = test['reviews__body'].values
  14. y_train = train['reviews__label']
  15. y_test = test['reviews__label']
  16.  
  17. def tokenize(text):
  18. return word_tokenize(text)
  19.  
  20. def stem(doc):
  21. return (stemmer.stem(w) for w in analyzer(doc))
  22.  
  23. get_stopwords = set(stopwords.words("english"))
  24.  
  25. vectorizer = CountVectorizer(analyzer = 'word',tokenizer = tokenize,lowercase = True,ngram_range = (1,1),stop_words = get_stopwords)
  26.  
  27. kfolds = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
  28.  
  29. np.random.seed(1)
  30.  
  31. pipeline_svm = make_pipeline(vectorizer, SVC(probability=True, kernel="linear", class_weight="balanced"))
  32.  
  33. grid_svm = GridSearchCV(pipeline_svm, param_grid = {'svc_C': [0.01, 0.1, 1]}, cv = kfolds, scoring="roc_auc", verbose=1, n_jobs=-1)
  34.  
  35. print(grid_svm.fit(X_train, y_train))
Add Comment
Please, Sign In to add comment