Norod78

macOS-pdf2txt.py

Jun 29th, 2021 (edited)
255
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.13 KB | None | 0 0
  1. #!/usr/bin/python
  2. # coding: utf-8
  3.  
  4. ########## Install dependencies
  5. #sudo -H /usr/bin/pip3 install pyobjc-core pyobjc-framework-Cocoa pyobjc-framework-Quartz
  6.  
  7. ########## Usage example
  8. #/usr/bin/python3 ./macOS-pdf2txt.py ./input.pdf
  9.  
  10. import os, sys
  11. from Quartz import PDFDocument
  12. from CoreFoundation import (NSURL, NSString)
  13. NSUTF8StringEncoding = 4
  14.  
  15. def pdf2txt():
  16.     for filename in sys.argv[1:]:  
  17.         #inputfile =filename.decode('utf-8') #python2
  18.         inputfile = filename #pyhton3
  19.         shortName = os.path.splitext(filename)[0]        
  20.         pdfURL = NSURL.fileURLWithPath_(inputfile)
  21.         pdfDoc = PDFDocument.alloc().initWithURL_(pdfURL)
  22.         if pdfDoc :
  23.             pdfString = NSString.stringWithString_(pdfDoc.string())
  24.             if len(pdfString) < 4096:
  25.                 outputfile = str(shortName) + str("-ERR.txt")
  26.             else:
  27.                 outputfile = str(shortName) + str("-OK.txt")
  28.             print("Writing " + str(outputfile))
  29.             pdfString.writeToFile_atomically_encoding_error_(outputfile, True, NSUTF8StringEncoding, None)
  30.  
  31. if __name__ == "__main__":
  32.    pdf2txt()
Add Comment
Please, Sign In to add comment