Guest User

Untitled

a guest
Apr 23rd, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.44 KB | None | 0 0
  1. positive_train_data = pd.read_csv('train_pos.tsv',sep = 't')
  2. negative_train_data = pd.read_csv('train_neg.tsv',sep = 't')
  3. positive_test_data = pd.read_csv('test_pos.tsv',sep = 't')
  4. negative_test_data = pd.read_csv('test_neg.tsv',sep = 't')
  5.  
  6. positive_train_data = positive_train_data[['Text','Sentiment']]
  7. negative_train_data = negative_train_data[['Text','Sentiment']]
  8. positive_test_data = positive_test_data[['Text','Sentiment']]
  9. negative_test_data = negative_test_data[['Text','Sentiment']]
  10.  
  11. train_data = pd.concat([positive_train_data,negative_train_data],ignore_index = True)
  12. train_data = train_data.sample(frac=1).reset_index(drop=True)
  13.  
  14. test_data = pd.concat([positive_test_data,negative_test_data],ignore_index = True)
  15. test_data = test_data.sample(frac=1).reset_index(drop=True)
  16.  
  17. data = pd.concat([positive_train_data,negative_train_data,positive_test_data,negative_test_data],ignore_index = True)
  18. data.reset_index(drop=True,inplace=True)
  19. x = data.Text
  20. y = data.Sentiment
  21. SEED = 2000
  22.  
  23.  
  24. x_train, x_test, y_train1, y_test = train_test_split(x, y, test_size = 0.3, random_state = 2000)
  25. print( "Train set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_train),
  26. (len(x_train[y_train1 == 0]) / (len(x_train)*1.))*100,
  27. (len(x_train[y_train1 == 1]) / (len(x_train)*1.))*100))
  28.  
  29. print ("Test set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_test),
  30. (len(x_test[y_test == 0]) / (len(x_test)*1.))*100,
  31. (len(x_test[y_test == 1]) / (len(x_test)*1.))*100))
  32.  
  33. tvec1 = TfidfVectorizer(max_features=10000,ngram_range=(1, 2),use_idf=1,smooth_idf=1,sublinear_tf=1,stop_words = 'english')
  34. tvec1.fit(x_train)
  35.  
  36. x_train_tfidf = tvec1.transform(x_train)
  37. x_test_tfidf = tvec1.transform(x_test).toarray()
  38.  
  39.  
  40.  
  41. model = Sequential()
  42. model.add(Dense(500, activation='relu', input_dim=1000))
  43. model.add(Dropout(0.25))
  44. model.add(Dense(50,activation = 'relu'))
  45. model.add(Dense(1, activation='sigmoid'))
  46.  
  47. model.add(Embedding(5000,64,input_length = 1000))
  48. model.add(Conv1D(128,3,padding = 'same',))
  49. model.add(Flatten())
  50. model.add(Dropout(0.25))
  51. model.add(Dense(100,activation = 'relu'))
  52. model.add(Dropout(0.25))
  53. model.add(Dense(1,activation='sigmoid'))
Add Comment
Please, Sign In to add comment