Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2 -W ignore::FutureWarning
- # put this in your path, rename to just 'Occur'
- # Occur import <- call with argument to count one word
- # or call no arguments to list frequencies of all words
- # -- Uses the default Tokenizer settings
- # which obliterate all punctuation
- # and make everything lower case.
- import os
- import subprocess
- import sys
- from keras.preprocessing.text import Tokenizer
- winid = os.environ['winid']
- px = subprocess.Popen(
- ("9p read acme/%s/body" % winid).split(),
- stdout=subprocess.PIPE)
- body, err = px.communicate()
- T = Tokenizer()
- T.fit_on_texts([body])
- #print(T.word_counts)
- if len(sys.argv) > 1:
- print(T.word_counts[sys.argv[1]])
- else:
- freqs = [(v,k) for k,v in T.word_counts.items()]
- freqs.sort()
- for pair in freqs:
- print("%05d %s" % pair)
Add Comment
Please, Sign In to add comment