Advertisement
Guest User

Untitled

a guest
Sep 15th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.56 KB | None | 0 0
  1. import PyPDF2
  2. import re
  3. import glob
  4.  
  5.  
  6. for file in glob.glob("pdf/*.pdf"):
  7. print(file)
  8. if file.endswith('.pdf'):
  9. fileReader = PyPDF2.PdfFileReader(open(file, "rb"))
  10. count = 0
  11. count = fileReader.numPages
  12. while count >= 0:
  13. count -= 1
  14. pageObj = fileReader.getPage(count)
  15. text = pageObj.extractText()
  16. resul = re.sub('[^a-zA-Z \n\.]',' ',text)
  17. print(resul)
  18. teste = resul
  19. teste.lower()
  20. print(teste)
  21. else:
  22. print("Não tem esse formato!")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement