Guest User

Untitled

a guest
Feb 26th, 2018
282
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.63 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. from keras.preprocessing.text import Tokenizer
  4. from keras.preprocessing.sequence import pad_sequences
  5. import numpy as np
  6. from sklearn.preprocessing import LabelEncoder
  7.  
  8. from keras.models import Sequential
  9. from keras.layers import Embedding, Flatten, Dense
  10. from sklearn.preprocessing import LabelEncoder
  11.  
  12. df=pd.read_csv('../data/crowdflower_text_emotion.csv')
  13.  
  14. df.drop(['tweet_id','author'],axis=1,inplace=True)
  15.  
  16. df=df[~df['sentiment'].isin(['empty','enthusiasm','boredom','anger'])]
  17.  
  18. df = df.sample(frac=1).reset_index(drop=True)
  19.  
  20. labels = df['sentiment']
  21. texts = df['content']
  22.  
  23. tokenizer = Tokenizer(5000)
  24. tokenizer.fit_on_texts(texts)
  25.  
  26. sequences = tokenizer.texts_to_sequences(texts)
  27.  
  28. word_index = tokenizer.word_index
  29. print('Found %s unique tokens.' % len(word_index))
  30.  
  31. data = pad_sequences(sequences, maxlen=37)
  32.  
  33. encoder = LabelEncoder()
  34. encoder.fit(labels)
  35. encoded_Y = encoder.transform(labels)
  36.  
  37. from keras.utils import np_utils
  38. labels = np_utils.to_categorical(encoded_Y)
  39.  
  40. print('Shape of data tensor:', data.shape)
  41. print('Shape of label tensor:', labels.shape)
  42.  
  43. indices = np.arange(data.shape[0])
  44. np.random.shuffle(indices)
  45. data = data[indices]
  46. labels = labels[indices]
  47. print(labels.shape)
  48.  
  49. model = Sequential()
  50. model.add(Embedding(5000, 30, input_length=37))
  51. model.add(Flatten())
  52. model.add(Dense(100,activation='relu'))
  53. model.add(Dense(50, activation='relu'))
  54. model.add(Dense(labels.shape[1], activation='softmax'))
  55. model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  56.  
  57. model.fit(data, labels, validation_split=0.2, epochs=10, batch_size=100)
Add Comment
Please, Sign In to add comment