Guest User

Untitled

a guest
Jan 18th, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.13 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. '''
  3. pip install pdfminer3k
  4. '''
  5. from pdfminer.pdfparser import PDFParser, PDFDocument
  6. from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
  7. from pdfminer.layout import LAParams, LTTextContainer
  8. from pdfminer.converter import PDFPageAggregator
  9. from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
  10.  
  11.  
  12. def search(pdf_path, key):
  13. fp = open(pdf_path, 'rb')
  14. parser = PDFParser(fp)
  15. doc = PDFDocument()
  16. parser.set_document(doc)
  17. doc.set_parser(parser)
  18. doc.initialize()
  19.  
  20. if not doc.is_extractable:
  21. raise PDFTextExtractionNotAllowed
  22. else:
  23. rsrcmgr = PDFResourceManager()
  24. laparams = LAParams()
  25. device = PDFPageAggregator(rsrcmgr, laparams=laparams)
  26. interpreter = PDFPageInterpreter(rsrcmgr, device)
  27. text = ''
  28. for page in doc.get_pages():
  29. interpreter.process_page(page)
  30. layout = device.get_result()
  31. for x in layout:
  32. if isinstance(x, LTTextContainer):
  33. text += x.get_text().strip()
  34. if key in text:
  35. return True
  36. return False
Add Comment
Please, Sign In to add comment