View difference between Paste ID: skNcPwYL and eN2vw58d
SHOW: | | - or go back to the newest paste.
1
# nurble.py by http://twitter.com/johnradke
2
import nltk
3
4
ok = ['NN', 'NNP', 'NNS']
5
punc = [',', '.', '!', '?']
6
7
def nurbword(taggedWord):
8-
  if taggedWord[1] in ok + punc: return taggedWord[0]
8+
  if taggedWord[1] in ok + punc: return taggedWord[0].upper()
9
  return 'nurble'
10
11
def untok(words):
12
  return "".join(words[0:1] + [w if w in punc else " " + w for w in words[1:]])
13
14
#############################
15
16
f = open('sotu.txt')
17
sotu = f.read()
18
f.close()
19
20
sentences = nltk.tokenize.sent_tokenize(sotu)
21
22
taggedSentences = [nltk.pos_tag(nltk.word_tokenize(s)) for s in sentences]
23
24
nurbled = open('nurbled.txt', 'w')
25
nurbled.write(" ".join(untok([nurbword(w) for w in s]) for s in taggedSentences))