Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # import Brown corpus from NLTK
- import nltk
- from nltk.corpus import brown
- # ask the user for the sequence of parts of speech
- pattern = raw_input("enter your pattern(pos1+pos2+pos3): ")
- # split the pattern into three tags: p1, p2 and p3
- elt_pattern = pattern.split("+")
- # choose the first document of brown corpus
- # find all the tagged words and put them into a list named corpus
- doc = brown.fileids()
- corpus = brown.tagged_words(doc[0])
- # allocate the three tags to p1, p2 and p3
- p1 = elt_pattern[0]
- p2 = elt_pattern[1]
- p3 = elt_pattern[2]
- # create a for loop to match words' tags with the three tags
- size = len(corpus)-2
- for i in range(size):
- if corpus[i][1]==p1 and corpus[i+1][1]==p2 and corpus[i+2][1]==p3:
- print str(corpus[i]) + " " + str(corpus[i+1])+ " " + str(corpus[i+2]) + "\n"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement