Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # create our training data
- training = []
- output = []
- # create an empty array for our output
- output_empty = [0] * len(classes)
- # training set, bag of words for each sentence
- for doc in documents:
- # initialize our bag of words
- bag = []
- # list of tokenized words for the pattern
- pattern_words = doc[0]
- # stem each word
- pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
- # create our bag of words array
- for w in words:
- bag.append(1) if w in pattern_words else bag.append(0)
- training.append(bag)
- # output is a '0' for each tag and '1' for current tag
- output_row = list(output_empty)
- output_row[classes.index(doc[1])] = 1
- output.append(output_row)
- # sample training/output
- i = 0
- w = documents[i][0]
- print ([stemmer.stem(word.lower()) for word in w])
- print (training[i])
- print (output[i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement