Advertisement
Guest User

Untitled

a guest
Jun 25th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.80 KB | None | 0 0
  1. # import Brown corpus from NLTK
  2. import nltk
  3. from nltk.corpus import brown
  4.  
  5. # ask the user for the sequence of parts of speech
  6. pattern = raw_input("enter your pattern(pos1+pos2+pos3): ")
  7.  
  8. # split the pattern into three tags: p1, p2 and p3
  9. elt_pattern = pattern.split("+")
  10.  
  11. # choose the first document of brown corpus
  12. # find all the tagged words and put them into a list named corpus
  13. doc = brown.fileids()
  14. corpus = brown.tagged_words(doc[0])
  15.  
  16. # allocate the three tags to p1, p2 and p3
  17. p1 = elt_pattern[0]
  18. p2 = elt_pattern[1]
  19. p3 = elt_pattern[2]
  20.  
  21. # create a for loop to match words' tags with the three tags
  22. size = len(corpus)-2
  23. for i in range(size):
  24.   if corpus[i][1]==p1 and corpus[i+1][1]==p2 and corpus[i+2][1]==p3:
  25.   print str(corpus[i]) + " " + str(corpus[i+1])+ " " + str(corpus[i+2]) + "\n"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement