Advertisement
pqnysekackaya

Untitled

Jun 12th, 2022
1,086
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.01 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.feature_extraction.text import TfidfVectorizer
  3. import nltk
  4. from nltk.corpus import stopwords as nltk_stopwords
  5. from sklearn.linear_model import LogisticRegression
  6.  
  7. try:
  8.     train = pd.read_csv('/tweets_lemm_train.csv')
  9.     test = pd.read_csv('/tweets_lemm_test.csv')
  10. except:
  11.     train = pd.read_csv('/datasets/tweets_lemm_train.csv')
  12.     test = pd.read_csv('/datasets/tweets_lemm_test.csv')
  13.  
  14. X_train = train['lemm_text'].values.astype('U')
  15. y_train = train['positive']
  16. X_test = test['lemm_text'].values.astype('U')
  17.  
  18. nltk.download('stopwords')
  19. stopwords = set(nltk_stopwords.words('russian'))
  20.  
  21. count_tf_idf = TfidfVectorizer(stop_words=stopwords)
  22. tf_idf_model = count_tf_idf.fit_transform(X_train)
  23. train_tfidf = count_tf_idf.transform(X_train)
  24. test_tfidf = count_tf_idf.transform(X_test)
  25.  
  26. model = LogisticRegression()
  27. model.fit(train_tfidf, y_train)
  28. predictions = pd.DataFrame(model.predict(test_tfidf))
  29. predictions.columns = ['positive']
  30.  
  31. predictions.to_csv('predictions')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement