Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """unit test for CaptionGenerator"""
- import math
- import numpy as np
- import tensorflow as tf
- from im2txt.inference_utils import caption_generator
- class FakeVocab(object):
- """fake vocabulary for testing purposes"""
- def __init__(self):
- self.start_id = 0 # word id denoting sentence start
- self.end_id = 1 # word id denoting snetence end
- class FakeModel(object):
- def __init__(self):
- # number of words in the vocab
- self._vocab_size = 12
- # dimensionality of the nominal model state
- self._state_size = 1
- # map of previous word to the probability distribution of the next word
- self._probabilities = {
- 0: {1: 0.1,
- 2: 0.2,
- 3: 0.3,
- 4: 0.4},
- 2: {5: 0.1,
- 6: 0.9},
- 3: {1: 0.1,
- 7: 0.4,
- 8: 0.5},
- 4: {1: 0.3,
- 9: 0.3,
- 10: 0.4},
- 5: {1: 1.0},
- 6: {1: 1.0},
- 7: {1: 1.0},
- 8: {1: 1.0},
- 9: {1: 0.5,
- 11: 0.5},
- 10: {1: 1.0},
- 11: {1: 1.0},
- }
- # pylint:disable = unused - argument
- def feed_image(self, sess, encoded_image):
- # return a nominal model state
- return np.zeros([1, self._state_size])
- def inference_step(self, sess, input_feed, state_feed):
- # compute the matrix of softmax distributions for the next batch of words
- batch_size = input_feed.shape[0]
- softmax_output = np.zeros([batch_size, self._vocab_size])
- for batch_index, word_id in enumerate(input_feed):
- for next_word, probability in self._probabilities[word_id].items():
- softmax_output[batch_index, next_word] = probability
- # nominal state and metadata
- new_state = np.zeros([batch_size, self._state_size])
- metadata = None
- return softmax_output, new_state, metadata
- # pylint: enable=unused - argument
- class CaptionGeneratorTest(tf.test.TestCase):
- def _assertExpectedCaptions(self, expected_captions, beam_size=3,
- max_caption_length=20,
- length_normalization_factor=0):
- """tests that beam search generates the expected captions
- expected_captions:a sequence of pairs(sentence,probability),where sentence is a list of integer
- ids and probability is a float in [0,1]
- beam_size:parameter passed to the beam_search()
- max_caption_length:parameter passed to the beam_search()
- length_nomalization_factor:parameter passed to the beam_search()
- """
- expected_sentences = [c[0] for c in expected_captions]
- expected_probabilities = [c[1] for c in expected_captions]
- # generate captions
- generator = caption_generator.CaptionGenerator(
- model=FakeModel(),
- vocab=FakeVocab(),
- beam_size=beam_size,
- max_caption_length=max_caption_length,
- length_normalization_factor=length_normalization_factor)
- actual_captions = generator.beam_search(sess=None, encoded_image=None)
- actual_sentences = [c.sentence for c in actual_captions]
- actual_probabilities = [math.exp(c.logprob) for c in actual_captions]
- self.assertEqual(expected_sentences, actual_sentences)
- self.assertAllClose(expected_probabilities, actual_probabilities)
- def testBeamSize(self):
- # beam size =1
- expected = [([0, 4, 10, 1], 0.16)]
- self._assertExpectedCaptions(expected, beam_size=1)
- # beam size =2
- expected = [([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)]
- self._assertExpectedCaptions(expected, beam_size=2)
- # beam size=3
- expected = [([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)]
- self._assertExpectedCaptions(expected, beam_size=3)
- def testMaxLength(self):
- # max length =1
- expected = [([0], 1.0)]
- self._assertExpectedCaptions(expected, max_caption_length=1)
- # max length =2
- # there are no complete sentences,so partial sentences are returned
- expected = [([0, 4], 0.4), ([0, 3], 0.3), ([0, 2], 0.2)]
- self._assertExpectedCaptions(expected, max_caption_length=2)
- # max length =3
- # there is at least one sentence,so only complete sentences are returned
- expected = [([0, 4, 1], 0.12), ([0, 3, 1], 0.03)]
- self._assertExpectedCaptions(expected, max_caption_length=3)
- # max length =4
- expected = [([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)]
- self._assertExpectedCaptions(expected, max_caption_length=4)
- def testLengthNormalization(self):
- # length normalization factor =3
- # the longest caption is returned first,despite having low probability,
- # because it has the highest log(probability)/length**3
- expected = [
- ([0, 4, 9, 11, 1], 0.06),
- ([0, 2, 6, 1], 0.18),
- ([0, 4, 10, 1], 0.16),
- ([0, 3, 8, 1], 0.15),
- ]
- self._assertExpectedCaptions(expected, beam_size=4, length_normalization_factor=3)
- if __name__ == '__main__':
- tf.test.main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement