Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from keras.preprocessing.text import Tokenizer
- def vocab_creater(text_lists, VOCAB_SIZE):
- tokenizer = Tokenizer(num_words=VOCAB_SIZE)
- tokenizer.fit_on_texts(text_lists)
- dictionary = tokenizer.word_index
- word2idx = {}
- idx2word = {}
- for k, v in dictionary.items():
- if v < VOCAB_SIZE:
- word2idx[k] = v
- index2word[v] = k
- if v >= VOCAB_SIZE-1:
- continue
- return word2idx, idx2word
- word2idx, idx2word = vocab_creater(text_lists=encoder_input_text+decoder_input_text, VOCAB_SIZE=14999)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement