Guest
Public paste!

rainyrhy

By: a guest | Oct 26th, 2008 | Syntax: Python | Size: 5.00 KB | Hits: 129 | Expires: Never
Copy text to clipboard
  1. #!/usr/bin/python
  2. #
  3. # ------------------------------------------------------------------------
  4. #       quick hack for importing google notebook notes onto evernote.
  5. #       some parts are from samples from my test export.
  6. #       written by rainyrhy on 23 Oct 2008
  7. # ---------------
  8. #       as for now, it opens read.xml in the current directory and outputs as
  9. #       write.enex
  10. # ---------------
  11. #       this script works successfully most of the time.
  12. #       known problems:
  13. #               - exporting - source url in title
  14. #               - evernote can't sync some notes upstream, said that content
  15. #                 data is incorrect
  16. #               - tag not supported.
  17. # ------------------------------------------------------------------------
  18. from xml import sax
  19. from datetime import datetime
  20. import pprint
  21.  
  22. class ParseGoogleReader(sax.handler.ContentHandler):
  23.         def startDocument(self):
  24.                 self.notelist = []
  25.                 self.curnote = None
  26.  
  27.                 self.receive = None     # characters receive status, enumuration of the 3 below
  28.                 self.recv_date = 'date'
  29.                 self.recv_title = 'title'
  30.                 self.recv_content = 'content'
  31.  
  32.         def startElement(self, name, attrs):
  33.                 self.receive = None # in case it skipped characters callback
  34.                                     # cause there is none
  35.  
  36.                 if name == 'entry':
  37.                         print 'new note'
  38.                         self.curnote = {}
  39.  
  40.                 if self.curnote is None: return
  41.  
  42.                 if name == 'updated':
  43.                         print 'taking date'
  44.                         self.receive = self.recv_date
  45.  
  46.                 if name == 'title':
  47.                         print 'taking title'
  48.                         self.receive = self.recv_title
  49.  
  50.                 if name == 'content':
  51.                         print 'taking content'
  52.                         self.receive = self.recv_content
  53.  
  54.                 if name == 'link':
  55.                         content = attrs.getValue('href')
  56.                         print 'link', content
  57.                         self.curnote['link'] = content
  58.  
  59.         def endElement(self, name):
  60.                 if name == 'entry':
  61.                         print 'end note'
  62.                         self.notelist.append(self.curnote)
  63.                         self.curnote = None;
  64.  
  65.         def characters(self, content):
  66.                 if self.receive is self.recv_date:
  67.                         print 'receiving date', content
  68.                         content = content[ : content.rfind('.')]
  69.                         content = content.replace('T', ' ')
  70.                         date = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
  71.                         self.curnote['date'] = date
  72.  
  73.                 if self.receive is self.recv_title:
  74.                         try:
  75.                                 print 'receiving title', content
  76.                         except UnicodeEncodeError:
  77.                                 pass
  78.  
  79.                         self.curnote['title'] = content
  80.  
  81.                 if self.receive is self.recv_content:
  82.                         print 'receiving content', len(content)
  83.                         if 'content' in self.curnote:
  84.                                 self.curnote['content'] += content
  85.                         else:
  86.                                 self.curnote['content'] = content
  87.  
  88. # -- some strings taken from sample. -----
  89. export_header = '''<?xml version="1.0" encoding="UTF-8"?>
  90. <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export.dtd">
  91. <en-export export-date="20081011T080456Z" application="Evernote" version="Mac 1.1.5 (36338)">
  92. '''
  93.  
  94. export_close = '''</en-export>'''
  95.  
  96. export_content_start = '''<content><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
  97. <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml.dtd">
  98. <en-note>'''
  99.  
  100. export_content_end = '''</en-note>]]></content>'''
  101.  
  102. # string if there's no link. make it look like a manual created note.
  103. export_deflink = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source-application>Evernote Web; 36564; Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3</source-application></note-attributes>'''
  104.  
  105. # string if there's a link. make it look like a web clip.
  106. export_link_start = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source>web.clip</source><source-url>'''
  107. export_link_end = '''</source-url></note-attributes>'''
  108.  
  109. def writenode(writeout, nodename, data):
  110.         ''' write data in the nodename as tagname to writeout '''
  111.         writeout.write( '<%s>' % nodename )
  112.         writeout.write(data)
  113.         writeout.write( '</%s>' % nodename )
  114.  
  115. def writenote(writeout, note):
  116.         ''' func for 1 note'''
  117.         writeout.write('<note>')
  118.  
  119.         if 'title' not in note:
  120.                 note['title'] = ''
  121.         if 'content' not in note:
  122.                 note['content'] = ''
  123.  
  124.         title = note['title'].encode('utf-8')
  125.         content = note['content'].encode('utf-8')
  126.  
  127.         writenode(writeout,'title', title)
  128.  
  129.         writeout.write(export_content_start)
  130.         writeout.write(content)
  131.         writeout.write(export_content_end)
  132.  
  133.         # weird way of date string from evernote.
  134.         datefoo = note['date'].strftime('%Y%m%d')
  135.         datebar = note['date'].strftime('%H%M%S')
  136.         datestr = datefoo + 'T' + datebar + 'Z'
  137.  
  138.         writenode(writeout, 'created', datestr)
  139.         writenode(writeout, 'updated', datestr)
  140.  
  141.         if 'link' in note:
  142.                 writeout.write(export_link_start)
  143.                 link = sax.saxutils.escape(note['link'])
  144.                 writeout.write(link)
  145.                 writeout.write(export_link_end)
  146.         else:
  147.                 writeout.write(export_deflink)
  148.        
  149.         writeout.write('</note>')
  150.  
  151. def main():
  152.         read = ParseGoogleReader();
  153.         sax.parse('read.xml', read)
  154.  
  155.         print len(read.notelist)
  156.         pp = pprint.PrettyPrinter(width=80)
  157.         #pp.pprint(read.notelist)
  158.  
  159.         writeout = open('write.enex','w')
  160.         writeout.write(export_header)
  161.         for note in read.notelist:
  162.                 writenote(writeout,note)
  163.         writeout.write(export_close)
  164.         writeout.flush()
  165.         writeout.close()
  166.  
  167. if __name__ == "__main__":
  168.         main()