SHOW:
|
|
- or go back to the newest paste.
1 | # nurble.py by http://twitter.com/johnradke | |
2 | import nltk | |
3 | ||
4 | ok = ['NN', 'NNP', 'NNS'] | |
5 | punc = [',', '.', '!', '?'] | |
6 | ||
7 | def nurbword(taggedWord): | |
8 | - | if taggedWord[1] in ok + punc: return taggedWord[0] |
8 | + | if taggedWord[1] in ok + punc: return taggedWord[0].upper() |
9 | return 'nurble' | |
10 | ||
11 | def untok(words): | |
12 | return "".join(words[0:1] + [w if w in punc else " " + w for w in words[1:]]) | |
13 | ||
14 | ############################# | |
15 | ||
16 | f = open('sotu.txt') | |
17 | sotu = f.read() | |
18 | f.close() | |
19 | ||
20 | sentences = nltk.tokenize.sent_tokenize(sotu) | |
21 | ||
22 | taggedSentences = [nltk.pos_tag(nltk.word_tokenize(s)) for s in sentences] | |
23 | ||
24 | nurbled = open('nurbled.txt', 'w') | |
25 | nurbled.write(" ".join(untok([nurbword(w) for w in s]) for s in taggedSentences)) |