Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- '''
- pip install pdfminer3k
- '''
- from pdfminer.pdfparser import PDFParser, PDFDocument
- from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
- from pdfminer.layout import LAParams, LTTextContainer
- from pdfminer.converter import PDFPageAggregator
- from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
- def search(pdf_path, key):
- fp = open(pdf_path, 'rb')
- parser = PDFParser(fp)
- doc = PDFDocument()
- parser.set_document(doc)
- doc.set_parser(parser)
- doc.initialize()
- if not doc.is_extractable:
- raise PDFTextExtractionNotAllowed
- else:
- rsrcmgr = PDFResourceManager()
- laparams = LAParams()
- device = PDFPageAggregator(rsrcmgr, laparams=laparams)
- interpreter = PDFPageInterpreter(rsrcmgr, device)
- text = ''
- for page in doc.get_pages():
- interpreter.process_page(page)
- layout = device.get_result()
- for x in layout:
- if isinstance(x, LTTextContainer):
- text += x.get_text().strip()
- if key in text:
- return True
- return False
Add Comment
Please, Sign In to add comment