Advertisement
Guest User

Untitled

a guest
May 25th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.20 KB | None | 0 0
  1. """vocabulary class for an image-to-text model"""
  2. from __future__ import division
  3. from __future__ import absolute_import
  4. from __future__ import print_function
  5.  
  6. import tensorflow as tf
  7.  
  8.  
  9. class Vocabulary(object):
  10. """vocabulary class for an image-to-text model"""
  11.  
  12. def __init__(self,
  13. vocab_file,
  14. start_word="<S>",
  15. end_word="</S>",
  16. unk_word="<UNK>"):
  17. """initializes the vocabulary
  18. vocab_file:file containing the vocabulary,where the words are the first
  19. whitespace-separated token on each file(other tokens are ignored) and the
  20. word ids are the corresponding line numbers
  21. start_word:special note denoting sentence start
  22. end_word:special note denoting sentence end
  23. unk_word: Special word denoting unknown words.
  24. """
  25. if not tf.gfile.Exists(vocab_file):
  26. tf.logging.fatal("vocab file %s not found", vocab_file)
  27. tf.logging.info("initializing vocabulary from file %s", vocab_file)
  28. with tf.gfile.GFile(vocab_file, mode="r") as f:
  29. reverse_vocab = list(f.readlines())
  30. reverse_vocab = [line.split()[0] for line in reverse_vocab]
  31. assert start_word in reverse_vocab
  32. assert end_word in reverse_vocab
  33. if unk_word not in reverse_vocab:
  34. reverse_vocab.append(unk_word)
  35. vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
  36. tf.logging.info("created vocabulary with %d words" % len(vocab))
  37.  
  38. self.vocab = vocab # vocab[word]=id
  39. self.reverse_vocab = reverse_vocab # reverse_vocab[id] =word
  40. # save special word ids.
  41. self.start_id = vocab[start_word]
  42. self.end_id = vocab[end_word]
  43. self.unk_id = vocab[unk_word]
  44.  
  45. def word_to_id(self, word):
  46. """returns the integer word id of a word string"""
  47. if word in self.vocab:
  48. return self.vocab[word]
  49. else:
  50. return self.unk_id
  51.  
  52. def id_to_word(self, word_id):
  53. """returns the word strings of an integer word id"""
  54. if word_id >= len(self.reverse_vocab):
  55. return self.reverse_vocab[self.unk_id]
  56. else:
  57. return self.reverse_vocab[word_id]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement