Advertisement
Guest User

Untitled

a guest
Jun 25th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. import io
  2.  
  3. from pdfminer.converter import TextConverter
  4. from pdfminer.pdfinterp import PDFPageInterpreter
  5. from pdfminer.pdfinterp import PDFResourceManager
  6. from pdfminer.pdfpage import PDFPage
  7.  
  8. def extract_text_from_pdf(pdf_path):
  9. resource_manager = PDFResourceManager()
  10. fake_file_handle = io.StringIO()
  11. converter = TextConverter(resource_manager, fake_file_handle)
  12. page_interpreter = PDFPageInterpreter(resource_manager, converter)
  13.  
  14. with open(pdf_path, 'rb') as fh:
  15. for page in PDFPage.get_pages(fh,
  16. caching=True,
  17. check_extractable=True):
  18. page_interpreter.process_page(page)
  19.  
  20. text = fake_file_handle.getvalue()
  21.  
  22. # close open handles
  23. converter.close()
  24. fake_file_handle.close()
  25.  
  26. if text:
  27. return text
  28.  
  29. if __name__ == '__main__':
  30. print(extract_text_from_pdf('test.pdf'))
  31.  
  32. ---------------------------------------------------------------------------
  33. TypeError Traceback (most recent call last)
  34. <ipython-input-7-cbf464387547> in <module>
  35. 28
  36. 29 if __name__ == '__main__':
  37. ---> 30 print(extract_text_from_pdf('test.pdf'))
  38.  
  39. <ipython-input-7-cbf464387547> in extract_text_from_pdf(pdf_path)
  40. 16 caching=True,
  41. 17 check_extractable=True):
  42. ---> 18 page_interpreter.process_page(page)
  43. 19
  44. 20 text = fake_file_handle.getvalue()
  45.  
  46. ~/anaconda3/lib/python3.7/site-packages/pdfminer/pdfinterp.py in process_page(self, page)
  47. 840 def process_page(self, page):
  48. 841 log.info('Processing page: %r', page)
  49. --> 842 (x0, y0, x1, y1) = page.mediabox
  50. 843 if page.rotate == 90:
  51. 844 ctm = (0, -1, 1, 0, -y0, x1)
  52.  
  53. TypeError: cannot unpack non-iterable NoneType object
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement