Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #
- # ------------------------------------------------------------------------
- # quick hack for importing google notebook notes onto evernote.
- # some parts are from samples from my test export.
- # written by rainyrhy on 23 Oct 2008
- # ---------------
- # as for now, it opens read.xml in the current directory and outputs as
- # write.enex
- # ---------------
- # this script works successfully most of the time.
- # known problems:
- # - exporting - source url in title
- # - evernote can't sync some notes upstream, said that content
- # data is incorrect
- # - tag not supported.
- # ------------------------------------------------------------------------
- from xml import sax
- from datetime import datetime
- import pprint
- class ParseGoogleReader(sax.handler.ContentHandler):
- def startDocument(self):
- self.notelist = []
- self.curnote = None
- self.receive = None # characters receive status, enumuration of the 3 below
- self.recv_date = 'date'
- self.recv_title = 'title'
- self.recv_content = 'content'
- def startElement(self, name, attrs):
- self.receive = None # in case it skipped characters callback
- # cause there is none
- if name == 'entry':
- print 'new note'
- self.curnote = {}
- if self.curnote is None: return
- if name == 'updated':
- print 'taking date'
- self.receive = self.recv_date
- if name == 'title':
- print 'taking title'
- self.receive = self.recv_title
- if name == 'content':
- print 'taking content'
- self.receive = self.recv_content
- if name == 'link':
- content = attrs.getValue('href')
- print 'link', content
- self.curnote['link'] = content
- def endElement(self, name):
- if name == 'entry':
- print 'end note'
- self.notelist.append(self.curnote)
- self.curnote = None;
- def characters(self, content):
- if self.receive is self.recv_date:
- print 'receiving date', content
- content = content[ : content.rfind('.')]
- content = content.replace('T', ' ')
- date = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
- self.curnote['date'] = date
- if self.receive is self.recv_title:
- try:
- print 'receiving title', content
- except UnicodeEncodeError:
- pass
- self.curnote['title'] = content
- if self.receive is self.recv_content:
- print 'receiving content', len(content)
- if 'content' in self.curnote:
- self.curnote['content'] += content
- else:
- self.curnote['content'] = content
- # -- some strings taken from sample. -----
- export_header = '''<?xml version="1.0" encoding="UTF-8"?>
- <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export.dtd">
- <en-export export-date="20081011T080456Z" application="Evernote" version="Mac 1.1.5 (36338)">
- '''
- export_close = '''</en-export>'''
- export_content_start = '''<content><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
- <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml.dtd">
- <en-note>'''
- export_content_end = '''</en-note>]]></content>'''
- # string if there's no link. make it look like a manual created note.
- export_deflink = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source-application>Evernote Web; 36564; Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3</source-application></note-attributes>'''
- # string if there's a link. make it look like a web clip.
- export_link_start = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source>web.clip</source><source-url>'''
- export_link_end = '''</source-url></note-attributes>'''
- def writenode(writeout, nodename, data):
- ''' write data in the nodename as tagname to writeout '''
- writeout.write( '<%s>' % nodename )
- writeout.write(data)
- writeout.write( '</%s>' % nodename )
- def writenote(writeout, note):
- ''' func for 1 note'''
- writeout.write('<note>')
- if 'title' not in note:
- note['title'] = ''
- if 'content' not in note:
- note['content'] = ''
- title = note['title'].encode('utf-8')
- content = note['content'].encode('utf-8')
- writenode(writeout,'title', title)
- writeout.write(export_content_start)
- writeout.write(content)
- writeout.write(export_content_end)
- # weird way of date string from evernote.
- datefoo = note['date'].strftime('%Y%m%d')
- datebar = note['date'].strftime('%H%M%S')
- datestr = datefoo + 'T' + datebar + 'Z'
- writenode(writeout, 'created', datestr)
- writenode(writeout, 'updated', datestr)
- if 'link' in note:
- writeout.write(export_link_start)
- link = sax.saxutils.escape(note['link'])
- writeout.write(link)
- writeout.write(export_link_end)
- else:
- writeout.write(export_deflink)
- writeout.write('</note>')
- def main():
- read = ParseGoogleReader();
- sax.parse('read.xml', read)
- print len(read.notelist)
- pp = pprint.PrettyPrinter(width=80)
- #pp.pprint(read.notelist)
- writeout = open('write.enex','w')
- writeout.write(export_header)
- for note in read.notelist:
- writenote(writeout,note)
- writeout.write(export_close)
- writeout.flush()
- writeout.close()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment