Untitled

for num, batch in enumerate(train_iter):
    # batch is a torchtext.data.batch.Batch object
    # In batch there are 32 instances
    # The data in batch have two fields: text and label
    # The text field contains the text index itself [len(longest sentence)x32] and
    #          the length of each setence [32] in this batch (remember include_lengths=True?)
    # The label field contains the prediction target [32]
    print("batch:")
    print(batch, "\n")

    # Print the label
    print("batch.label:")
    print(batch.label, "\n")

    # Print the first component of batch.text (the sequence of words)
    print("batch.text[0]:")
    print(batch.text[0], "\n")

    # Print the first element in batch.text[0], which is all first words in all the sentences
    # You may notice that the results are all 2
    # That's because we use <SOS> to represent the beginning of a sentence
    # Going back to TEXT.vocab.stoi you can find that the index of <SOS> is 2
    print("batch.text[0][0]:")
    print(batch.text[0][0], "\n")

    # Print the actual content fifth sentence in this batch
    # Noted that we use the lookup dictionary to recover the word index to word
    print("Fifth sentence: ")
    for i in range(batch.text[0].size()[0]):
        print(lookup[batch.text[0][i].tolist()[5]], end=" ")
    # You might find that torchtext automatically add <PAD>s after <EOS> to make all sentence
    #           having identical length in this batch
    break