Guest User

Untitled

a guest
Feb 22nd, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. #!/usr/bin/env python2 -W ignore::FutureWarning
  2. # put this in your path, rename to just 'Occur'
  3. # Occur import <- call with argument to count one word
  4. # or call no arguments to list frequencies of all words
  5. # -- Uses the default Tokenizer settings
  6. # which obliterate all punctuation
  7. # and make everything lower case.
  8. import os
  9. import subprocess
  10. import sys
  11. from keras.preprocessing.text import Tokenizer
  12.  
  13. winid = os.environ['winid']
  14. px = subprocess.Popen(
  15. ("9p read acme/%s/body" % winid).split(),
  16. stdout=subprocess.PIPE)
  17. body, err = px.communicate()
  18.  
  19. T = Tokenizer()
  20. T.fit_on_texts([body])
  21.  
  22. #print(T.word_counts)
  23.  
  24. if len(sys.argv) > 1:
  25. print(T.word_counts[sys.argv[1]])
  26.  
  27. else:
  28. freqs = [(v,k) for k,v in T.word_counts.items()]
  29. freqs.sort()
  30. for pair in freqs:
  31. print("%05d %s" % pair)
Add Comment
Please, Sign In to add comment