SHARE
TWEET

Untitled

a guest May 25th, 2019 58 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. """vocabulary class for an image-to-text model"""
  2. from __future__ import division
  3. from __future__ import absolute_import
  4. from __future__ import print_function
  5.  
  6. import tensorflow as tf
  7.  
  8.  
  9. class Vocabulary(object):
  10.     """vocabulary class for an image-to-text model"""
  11.  
  12.     def __init__(self,
  13.                  vocab_file,
  14.                  start_word="<S>",
  15.                  end_word="</S>",
  16.                  unk_word="<UNK>"):
  17.         """initializes the vocabulary
  18.         vocab_file:file containing the vocabulary,where the words are the first
  19.         whitespace-separated token on each file(other tokens are ignored) and the
  20.         word ids are the corresponding line numbers
  21.         start_word:special note denoting sentence start
  22.         end_word:special note denoting sentence end
  23.         unk_word: Special word denoting unknown words.
  24.         """
  25.         if not tf.gfile.Exists(vocab_file):
  26.             tf.logging.fatal("vocab file %s not found", vocab_file)
  27.         tf.logging.info("initializing vocabulary from file %s", vocab_file)
  28.         with tf.gfile.GFile(vocab_file, mode="r") as f:
  29.             reverse_vocab = list(f.readlines())
  30.         reverse_vocab = [line.split()[0] for line in reverse_vocab]
  31.         assert start_word in reverse_vocab
  32.         assert end_word in reverse_vocab
  33.         if unk_word not in reverse_vocab:
  34.             reverse_vocab.append(unk_word)
  35.         vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
  36.         tf.logging.info("created vocabulary with %d words" % len(vocab))
  37.  
  38.         self.vocab = vocab  # vocab[word]=id
  39.         self.reverse_vocab = reverse_vocab  # reverse_vocab[id] =word
  40.         # save special word ids.
  41.         self.start_id = vocab[start_word]
  42.         self.end_id = vocab[end_word]
  43.         self.unk_id = vocab[unk_word]
  44.  
  45.     def word_to_id(self, word):
  46.         """returns the integer word id of a word string"""
  47.         if word in self.vocab:
  48.             return self.vocab[word]
  49.         else:
  50.             return self.unk_id
  51.  
  52.     def id_to_word(self, word_id):
  53.         """returns the word strings of an integer word id"""
  54.         if word_id >= len(self.reverse_vocab):
  55.             return self.reverse_vocab[self.unk_id]
  56.         else:
  57.             return self.reverse_vocab[word_id]
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top