Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #This can be used for server or localhost
- #Working file browse and upload
- #upload.html
- <html>
- <body>
- <form enctype='multipart/form-data' action='api.py' method='post'>
- <p>File: <input type='file' name='filename' /></p>
- <p><input type='submit' value='Upload' name='action'/></p>
- </form>
- </body>
- </html>
- ============================================================================================================================
- #API
- #api.py
- #python3 api.py
- import cgi
- import os
- import sys
- import popplerqt5
- import PyQt5
- import PyPDF2
- def ExtractHighlights(InFile):
- Highlights = []
- doc = popplerqt5.Poppler.Document.load(InFile)
- total_annotations = 0
- for x in range(doc.numPages()):
- page = doc.page(x)
- annotations = page.annotations()
- (pwidth, pheight) = (page.pageSize().width(), page.pageSize().height())
- if len(annotations) > 0:
- for annotation in annotations:
- if isinstance(annotation, popplerqt5.Poppler.Annotation):
- total_annotations += 1
- if(isinstance(annotation, popplerqt5.Poppler.HighlightAnnotation)):
- quads = annotation.highlightQuads()
- txt = ""
- for quad in quads:
- rect = (quad.points[0].x() * pwidth,
- quad.points[0].y() * pheight,
- quad.points[2].x() * pwidth,
- quad.points[2].y() * pheight)
- bdy = PyQt5.QtCore.QRectF()
- bdy.setCoords(*rect)
- txt = txt + str(page.text(bdy)) + ' '
- # Clean up the string a bit
- txt = txt.lstrip()
- txt = txt.rstrip()
- Highlights.append(txt + ',' + str(x))
- if (total_annotations < 1):
- return '0'
- return Highlights
- def BuildReport(Data):
- # Build Report of highlighted words, each word only once,
- # if the word occurs on more than one page - record the word only once and append it's page numbers
- FinalList = ''
- for x in range(len(Data)):
- Pages = ''
- for y in range(len(Data)):
- if(Data[x].split(',')[0] == Data[y].split(',')[0]):
- Pages = Pages + Data[y].split(',')[1] + ','
- Pages = Pages.rstrip(',')
- FinalList = FinalList + Data[x].split(',')[0] + ',' + Pages + '-'
- FinalList = FinalList.rstrip('-')
- return FinalList
- form = cgi.FieldStorage()
- TempPath = '/var/www/html/PDFtool/tmp/'
- fileitem = form['filename']
- # Test if the file is loaded for the upload
- if (fileitem.filename):
- fn = os.path.basename(fileitem.filename)
- open(TempPath + fn, 'wb').write(fileitem.file.read())
- print ('Content-type:text/html\r\n')
- print (BuildReport(ExtractHighlights(TempPath + fn)))
- else:
- print ('Content-type:text/html\r\n')
- print ('Error')
Add Comment
Please, Sign In to add comment