Guest User

Untitled

a guest
Jun 22nd, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.90 KB | None | 0 0
  1. import os
  2. import pickle
  3. import numpy as np
  4.  
  5. import tensorflow as tf
  6. from keras.backend.tensorflow_backend import set_session
  7. config = tf.ConfigProto()
  8. config.gpu_options.per_process_gpu_memory_fraction = 0.4
  9. set_session(tf.Session(config=config))
  10.  
  11. from keras.models import Sequential, load_model
  12. from keras.layers import Dense, LSTM, Dropout
  13. from keras.utils import to_categorical
  14. from keras import optimizers
  15.  
  16. class StringEmbeddingsScript(object):
  17. CHAR_NONE = '\x00'
  18. CHAR_START = '\x01'
  19. CHAR_END = '\x02'
  20.  
  21. WORDS = ['India', 'keras', 'stackoverflow', 'tensorflow']
  22.  
  23. def create_model(self, num_units, word_len, num_unique_chars):
  24. input_shape = (word_len, num_unique_chars)
  25.  
  26. model = Sequential()
  27. model.add(LSTM(num_units, input_shape=input_shape, unroll=True))
  28. model.add(Dense(num_unique_chars, activation='softmax'))
  29.  
  30. model.compile(optimizer=optimizers.Adam(lr=0.003),
  31. loss='categorical_crossentropy',
  32. metrics=['mse'])
  33. return model
  34.  
  35. def get_char_to_int(self):
  36. words = self.WORDS
  37. max_len = max(len(w) for w in words) + 2 # adding 2 for start and end chars
  38. nwords = len(words)
  39.  
  40. chars = list(sorted(list(set(list(''.join(words))))))
  41. chars = [self.CHAR_NONE, self.CHAR_START, self.CHAR_END] + chars
  42. charmap = { c: i for i, c in enumerate(chars) }
  43. nchars = len(chars)
  44.  
  45. return max_len, nchars, nwords, words, charmap
  46.  
  47. def generator(self, max_len, nchars, nwords, words, charmap, b_size):
  48. while 1:
  49. char_none = to_categorical(charmap[self.CHAR_NONE], num_classes=nchars)
  50. num_batches = (nwords//b_size) + 1
  51.  
  52. for i in range(num_batches):
  53. start = b_size * i
  54. end = b_size * (i + 1)
  55.  
  56. split_words = words[start:end]
  57. n = len(split_words)
  58.  
  59. data = np.zeros(shape=(n, max_len, nchars), dtype=np.float32)
  60. labels = np.zeros(shape=(n, nchars), dtype=np.float32)
  61.  
  62. for i in range(n):
  63. w = split_words[i][:-1]
  64. last_char = split_words[i][-1]
  65. w = '%s%s%s' % (self.CHAR_START, w, self.CHAR_END)
  66. w = [to_categorical(charmap[x], num_classes=nchars) for x in w]
  67. w = w + ([char_none] * (max_len - len(w)))
  68. data[i] = w
  69. labels[i] = to_categorical(charmap[last_char], num_classes=nchars
  70.  
  71. yield data, labels
  72.  
  73. def run(self):
  74. neurons = 128
  75. max_len, nchars, nwords, words, charmap = self.get_char_to_int()
  76.  
  77. model = self.create_model(neurons, max_len, nchars)
  78.  
  79. generator = self.generator(max_len, nchars, nwords, words, charmap, 2)
  80. model.fit_generator(generator, steps_per_epoch= nwords/2, epochs=10)
  81.  
  82. if __name__ == '__main__':
  83. StringEmbeddingsScript().run()
Add Comment
Please, Sign In to add comment