Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import enchant # https://pythonhosted.org/pyenchant/
- import re
- FILENAME='file.pdf'
- d = enchant.Dict("en_US")
- with open(FILENAME, 'rb') as f:
- menu = f.read()
- m = re.sub('[^a-zA-Z]+', ' ', menu)
- words = filter(lambda w: len(w)>1 and d.check(w), m.split(' '))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement