Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- positive_train_data = pd.read_csv('train_pos.tsv',sep = 't')
- negative_train_data = pd.read_csv('train_neg.tsv',sep = 't')
- positive_test_data = pd.read_csv('test_pos.tsv',sep = 't')
- negative_test_data = pd.read_csv('test_neg.tsv',sep = 't')
- positive_train_data = positive_train_data[['Text','Sentiment']]
- negative_train_data = negative_train_data[['Text','Sentiment']]
- positive_test_data = positive_test_data[['Text','Sentiment']]
- negative_test_data = negative_test_data[['Text','Sentiment']]
- train_data = pd.concat([positive_train_data,negative_train_data],ignore_index = True)
- train_data = train_data.sample(frac=1).reset_index(drop=True)
- test_data = pd.concat([positive_test_data,negative_test_data],ignore_index = True)
- test_data = test_data.sample(frac=1).reset_index(drop=True)
- data = pd.concat([positive_train_data,negative_train_data,positive_test_data,negative_test_data],ignore_index = True)
- data.reset_index(drop=True,inplace=True)
- x = data.Text
- y = data.Sentiment
- SEED = 2000
- x_train, x_test, y_train1, y_test = train_test_split(x, y, test_size = 0.3, random_state = 2000)
- print( "Train set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_train),
- (len(x_train[y_train1 == 0]) / (len(x_train)*1.))*100,
- (len(x_train[y_train1 == 1]) / (len(x_train)*1.))*100))
- print ("Test set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_test),
- (len(x_test[y_test == 0]) / (len(x_test)*1.))*100,
- (len(x_test[y_test == 1]) / (len(x_test)*1.))*100))
- tvec1 = TfidfVectorizer(max_features=10000,ngram_range=(1, 2),use_idf=1,smooth_idf=1,sublinear_tf=1,stop_words = 'english')
- tvec1.fit(x_train)
- x_train_tfidf = tvec1.transform(x_train)
- x_test_tfidf = tvec1.transform(x_test).toarray()
- model = Sequential()
- model.add(Dense(500, activation='relu', input_dim=1000))
- model.add(Dropout(0.25))
- model.add(Dense(50,activation = 'relu'))
- model.add(Dense(1, activation='sigmoid'))
- model.add(Embedding(5000,64,input_length = 1000))
- model.add(Conv1D(128,3,padding = 'same',))
- model.add(Flatten())
- model.add(Dropout(0.25))
- model.add(Dense(100,activation = 'relu'))
- model.add(Dropout(0.25))
- model.add(Dense(1,activation='sigmoid'))
Add Comment
Please, Sign In to add comment