Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- for num, batch in enumerate(train_iter):
- # batch is a torchtext.data.batch.Batch object
- # In batch there are 32 instances
- # The data in batch have two fields: text and label
- # The text field contains the text index itself [len(longest sentence)x32] and
- # the length of each setence [32] in this batch (remember include_lengths=True?)
- # The label field contains the prediction target [32]
- print("batch:")
- print(batch, "\n")
- # Print the label
- print("batch.label:")
- print(batch.label, "\n")
- # Print the first component of batch.text (the sequence of words)
- print("batch.text[0]:")
- print(batch.text[0], "\n")
- # Print the first element in batch.text[0], which is all first words in all the sentences
- # You may notice that the results are all 2
- # That's because we use <SOS> to represent the beginning of a sentence
- # Going back to TEXT.vocab.stoi you can find that the index of <SOS> is 2
- print("batch.text[0][0]:")
- print(batch.text[0][0], "\n")
- # Print the actual content fifth sentence in this batch
- # Noted that we use the lookup dictionary to recover the word index to word
- print("Fifth sentence: ")
- for i in range(batch.text[0].size()[0]):
- print(lookup[batch.text[0][i].tolist()[5]], end=" ")
- # You might find that torchtext automatically add <PAD>s after <EOS> to make all sentence
- # having identical length in this batch
- break
Add Comment
Please, Sign In to add comment