Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #grab_Highlighted_texts_and_its_page_Number_from_PDF
- #python3
- #python3 highlight.py example.pdf
- #skip420@skip420:~/Desktop/pdfhighlight$ python3 highlight.py example.pdf
- challenge 11
- (nutritional 11
- legumes, 16
- Pritikin 17
- 4 annotation(s) found
- #Author:Skip420
- # python3 test5.py example.pdf
- #Note: Where it says "example.pdf" You may replace the file name to whatever it is.
- import popplerqt5 # sudo apt-get install -y python3-poppler-qt5
- import sys
- import PyQt5
- import urllib
- import os
- import PyPDF2
- def main():
- doc = popplerqt5.Poppler.Document.load(sys.argv[1])
- total_annotations = 0
- for x in range(doc.numPages()):
- #print("========= PAGE {} =========".format(i+1))
- page = doc.page(x)
- annotations = page.annotations()
- (pwidth, pheight) = (page.pageSize().width(), page.pageSize().height())
- if len(annotations) > 0:
- for annotation in annotations:
- if isinstance(annotation, popplerqt5.Poppler.Annotation):
- total_annotations += 1
- if(isinstance(annotation, popplerqt5.Poppler.HighlightAnnotation)):
- quads = annotation.highlightQuads()
- txt = ""
- for quad in quads:
- rect = (quad.points[0].x() * pwidth,
- quad.points[0].y() * pheight,
- quad.points[2].x() * pwidth,
- quad.points[2].y() * pheight)
- bdy = PyQt5.QtCore.QRectF()
- bdy.setCoords(*rect)
- txt = txt + str(page.text(bdy)) + ' '
- #print("========= ANNOTATION =========")
- print (txt, x)
- if total_annotations > 0:
- print (str(total_annotations) + " annotation(s) found")
- else:
- print ("no annotations found")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement