Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2019
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.54 KB | None | 0 0
  1. from keras.preprocessing.text import Tokenizer
  2.  
  3. def vocab_creater(text_lists, VOCAB_SIZE):
  4.  
  5. tokenizer = Tokenizer(num_words=VOCAB_SIZE)
  6. tokenizer.fit_on_texts(text_lists)
  7. dictionary = tokenizer.word_index
  8.  
  9. word2idx = {}
  10. idx2word = {}
  11. for k, v in dictionary.items():
  12. if v < VOCAB_SIZE:
  13. word2idx[k] = v
  14. index2word[v] = k
  15. if v >= VOCAB_SIZE-1:
  16. continue
  17.  
  18. return word2idx, idx2word
  19.  
  20. word2idx, idx2word = vocab_creater(text_lists=encoder_input_text+decoder_input_text, VOCAB_SIZE=14999)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement