Untitled

# generate batch via random sampling of images and captions for them,
# we use `max_len` parameter to control the length of the captions (truncating long captions)
def generate_batch(images_embeddings, indexed_captions, batch_size, max_len=None):
    """
    `images_embeddings` is a np.array of shape [number of images, IMG_EMBED_SIZE].
    `indexed_captions` holds 5 vocabulary indexed captions for each image:
    [
        [
            [vocab[START], vocab["image1"], vocab["caption1"], vocab[END]],
            [vocab[START], vocab["image1"], vocab["caption2"], vocab[END]],
            ...
        ],
        ...
    ]
    Generate a random batch of size `batch_size`.
    Take random images and choose one random caption for each image.
    Remember to use `batch_captions_to_matrix` for padding and respect `max_len` parameter.
    Return feed dict {decoder.img_embeds: ..., decoder.sentences: ...}.
    """
    batch_image_embeddings = list()
    batch_captions_matrix = list()
    for i in np.arange(batch_size):
        x = np.random.choice(images_embeddings.shape[0])
        batch_image_embeddings.append(images_embeddings[x])
        y = np.random.choice(5)
        batch_captions_matrix.append(indexed_captions[x][y])

    batch_captions_matrix = batch_captions_to_matrix(batch_captions_matrix, pad_idx, max_len)

    return {decoder.img_embeds: batch_image_embeddings,
            decoder.sentences: batch_captions_matrix}