Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- from keras.preprocessing import sequence
- from keras.models import Sequential
- from keras.layers import Dense, Embedding, Dropout
- from keras.layers import LSTM, TimeDistributed, Flatten
- from keras.datasets import imdb
- from keras.callbacks import EarlyStopping, ModelCheckpoint
- import numpy as np
- max_features = 20000
- maxlen = 80 # cut texts after this number of words (among top max_features most common words)
- batch_size = 32
- embedding_dim = 100
- def generate_batch(batchsize):
- (x_train, y_train), (_,_) = imdb.load_data(num_words=max_features)
- print("train_size", x_train.shape)
- while True:
- for i in range(0, len(x_train), batchsize):
- x_batch = x_train[i:(i+batchsize)]
- y_batch = y_train[i:(i+batchsize)]
- x_batch = sequence.pad_sequences(x_batch, maxlen=maxlen, padding='post')
- yield(x_batch, y_batch)
- def generate_val(valsize):
- (_,_), (x_test, y_test) = imdb.load_data(num_words=max_features)
- print("test_size", x_test.shape)
- while True:
- for i in range(0, len(x_test), valsize):
- x_val = x_test[i:(i+valsize)]
- y_val = y_test[i:(i+valsize)]
- x_val = sequence.pad_sequences(x_val, maxlen=maxlen, padding='post')
- yield(x_val, y_val)
- print('Build model...')
- primary_model = Sequential()
- primary_model.add(Embedding(input_dim = max_features,
- output_dim = embedding_dim,
- trainable=True,
- weights=[(np.eye(max_features,embedding_dim))],
- mask_zero=True))
- primary_model.add(TimeDistributed(Dense(150, use_bias=False)))
- primary_model.add(LSTM(128))
- primary_model.add(Dense(2, activation='softmax'))
- primary_model.summary()
- primary_model.compile(loss='sparse_categorical_crossentropy',
- optimizer='adam',
- metrics=['accuracy'])
- print('Train...')
- filepath = "primeweights-{epoch:02d}-{val_acc:.2f}.hdf5"
- checkpoint = ModelCheckpoint(filepath,
- verbose=1,
- save_best_only=True)
- early_stopping_monitor = EarlyStopping(patience=2)
- primary_model.fit_generator(generate_batch(25),
- steps_per_epoch = 1000,
- epochs = 2,
- callbacks=[early_stopping_monitor],
- validation_data=generate_val(25),
- validation_steps=1000)
- (_,_), (x_test, y_test) = imdb.load_data(num_words=max_features)
- x_test = sequence.pad_sequences(x_test, maxlen=maxlen, padding='post')
- score, acc = primary_model.evaluate(x_test, y_test, batch_size=batch_size)
- print('Test score:', score)
- print('Test accuracy:', acc)
- primary_model.save('primary_model_imdb.h5')
Add Comment
Please, Sign In to add comment