Advertisement
Guest User

Untitled

a guest
Jan 24th, 2017
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.84 KB | None | 0 0
  1. # create our training data
  2. training = []
  3. output = []
  4. # create an empty array for our output
  5. output_empty = [0] * len(classes)
  6.  
  7. # training set, bag of words for each sentence
  8. for doc in documents:
  9. # initialize our bag of words
  10. bag = []
  11. # list of tokenized words for the pattern
  12. pattern_words = doc[0]
  13. # stem each word
  14. pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
  15. # create our bag of words array
  16. for w in words:
  17. bag.append(1) if w in pattern_words else bag.append(0)
  18.  
  19. training.append(bag)
  20. # output is a '0' for each tag and '1' for current tag
  21. output_row = list(output_empty)
  22. output_row[classes.index(doc[1])] = 1
  23. output.append(output_row)
  24.  
  25. # sample training/output
  26. i = 0
  27. w = documents[i][0]
  28. print ([stemmer.stem(word.lower()) for word in w])
  29. print (training[i])
  30. print (output[i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement