Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # coding: utf-8
- ########## Install dependencies
- #sudo -H /usr/bin/pip3 install pyobjc-core pyobjc-framework-Cocoa pyobjc-framework-Quartz
- ########## Usage example
- #/usr/bin/python3 ./macOS-pdf2txt.py ./input.pdf
- import os, sys
- from Quartz import PDFDocument
- from CoreFoundation import (NSURL, NSString)
- NSUTF8StringEncoding = 4
- def pdf2txt():
- for filename in sys.argv[1:]:
- #inputfile =filename.decode('utf-8') #python2
- inputfile = filename #pyhton3
- shortName = os.path.splitext(filename)[0]
- pdfURL = NSURL.fileURLWithPath_(inputfile)
- pdfDoc = PDFDocument.alloc().initWithURL_(pdfURL)
- if pdfDoc :
- pdfString = NSString.stringWithString_(pdfDoc.string())
- if len(pdfString) < 4096:
- outputfile = str(shortName) + str("-ERR.txt")
- else:
- outputfile = str(shortName) + str("-OK.txt")
- print("Writing " + str(outputfile))
- pdfString.writeToFile_atomically_encoding_error_(outputfile, True, NSUTF8StringEncoding, None)
- if __name__ == "__main__":
- pdf2txt()
Add Comment
Please, Sign In to add comment