Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse as ap
- import os, os.path
- from xml.dom.minidom import Document as XmlDocument
- import time
- VERSION = "1.0"
- GREETING = "CintaNotes TXT folder importer V%s.\n" % VERSION
- def main():
- print(GREETING)
- argsParser = createArgsParser()
- argsParser.print_help()
- args = argsParser.parse_args()
- print('Processing..')
- count = convert(args.inputFolder, args.outputXML, args.encoding, args.recurse)
- print('\n-> Converted %d file(s).' % count)
- def createArgsParser():
- parser = ap.ArgumentParser(description = "Converts folders with text files to importable XML. Only files with .txt extension are read.")
- parser.add_argument("inputFolder",
- help = 'Folder with TXT files to be converted.',
- type = str)
- parser.add_argument("outputXML",
- help = 'Resulting XML file name',
- type = str)
- parser.add_argument("-e", "--encoding", dest="encoding", help = 'Encoding of TXT files: utf-8 (default) or utf-16', type = str, default = 'utf-8')
- parser.add_argument("-r", "--recurse", dest="recurse", help = 'Recurse into subdirectories', action = 'store_true', default = False)
- return parser
- def convert(inputFolder, outputXML, encoding, recurse):
- xml = XmlDocument()
- root = xml.createElement('notebook')
- root.setAttribute('version', '1600')
- xml.appendChild(root)
- count = convertFiles(inputFolder, xml, encoding, recurse)
- output = open(outputXML, 'w', encoding = 'utf-16le')
- output.write(xml.toprettyxml())
- return count
- def convertFiles(inputFolder, xmlDocument, encoding, recurse):
- count = 0
- for root, dirs, files in os.walk(inputFolder):
- for file in files:
- (name, ext) = os.path.splitext(file)
- if ext == '.txt':
- addTextFileToXml(os.path.join(root, file), xmlDocument, encoding)
- count = count + 1
- if not recurse:
- break
- return count
- def addTextFileToXml(filePath, xmlDocument, encoding):
- filePath = os.path.abspath(filePath)
- file = open(filePath, encoding = encoding)
- (_, filename) = os.path.split(filePath)
- (title, _) = os.path.splitext(filename)
- body = removeBOM(file.read())
- note = xmlDocument.createElement('note')
- note.setAttribute('title', title)
- note.setAttribute('link', 'file://' + filePath)
- note.setAttribute('tags', '')
- note.setAttribute('source', filePath)
- note.setAttribute('created', getCreatedTime(filePath))
- note.setAttribute('modified', getModifiedTime(filePath))
- noteBody = xmlDocument.createCDATASection(body)
- note.appendChild(noteBody)
- xmlDocument.documentElement.appendChild(note)
- def getCreatedTime(filePath):
- return timeToStr(os.path.getctime(filePath))
- def getModifiedTime(filePath):
- return timeToStr(os.path.getmtime(filePath))
- def timeToStr(tm):
- return time.strftime('%Y%m%dT%H%M%S', time.gmtime(tm))
- def removeBOM(s):
- if s.startswith('\uFEFF') or s.startswith('\uFFFE') or s.startswith('\uEFBBBF'):
- return s[1:]
- return s
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement