Guest User

rainyrhy

a guest
Oct 26th, 2008
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.00 KB | None | 0 0
  1. #!/usr/bin/python
  2. #
  3. # ------------------------------------------------------------------------
  4. #   quick hack for importing google notebook notes onto evernote.
  5. #   some parts are from samples from my test export.
  6. #   written by rainyrhy on 23 Oct 2008
  7. # ---------------
  8. #   as for now, it opens read.xml in the current directory and outputs as
  9. #   write.enex
  10. # ---------------
  11. #   this script works successfully most of the time.
  12. #   known problems:
  13. #       - exporting - source url in title
  14. #       - evernote can't sync some notes upstream, said that content
  15. #         data is incorrect
  16. #       - tag not supported.
  17. # ------------------------------------------------------------------------
  18. from xml import sax
  19. from datetime import datetime
  20. import pprint
  21.  
  22. class ParseGoogleReader(sax.handler.ContentHandler):
  23.     def startDocument(self):
  24.         self.notelist = []
  25.         self.curnote = None
  26.  
  27.         self.receive = None # characters receive status, enumuration of the 3 below
  28.         self.recv_date = 'date'
  29.         self.recv_title = 'title'
  30.         self.recv_content = 'content'
  31.  
  32.     def startElement(self, name, attrs):
  33.         self.receive = None # in case it skipped characters callback
  34.                             # cause there is none
  35.  
  36.         if name == 'entry':
  37.             print 'new note'
  38.             self.curnote = {}
  39.  
  40.         if self.curnote is None: return
  41.  
  42.         if name == 'updated':
  43.             print 'taking date'
  44.             self.receive = self.recv_date
  45.  
  46.         if name == 'title':
  47.             print 'taking title'
  48.             self.receive = self.recv_title
  49.  
  50.         if name == 'content':
  51.             print 'taking content'
  52.             self.receive = self.recv_content
  53.  
  54.         if name == 'link':
  55.             content = attrs.getValue('href')
  56.             print 'link', content
  57.             self.curnote['link'] = content
  58.  
  59.     def endElement(self, name):
  60.         if name == 'entry':
  61.             print 'end note'
  62.             self.notelist.append(self.curnote)
  63.             self.curnote = None;
  64.  
  65.     def characters(self, content):
  66.         if self.receive is self.recv_date:
  67.             print 'receiving date', content
  68.             content = content[ : content.rfind('.')]
  69.             content = content.replace('T', ' ')
  70.             date = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
  71.             self.curnote['date'] = date
  72.  
  73.         if self.receive is self.recv_title:
  74.             try:
  75.                 print 'receiving title', content
  76.             except UnicodeEncodeError:
  77.                 pass
  78.  
  79.             self.curnote['title'] = content
  80.  
  81.         if self.receive is self.recv_content:
  82.             print 'receiving content', len(content)
  83.             if 'content' in self.curnote:
  84.                 self.curnote['content'] += content
  85.             else:
  86.                 self.curnote['content'] = content
  87.  
  88. # -- some strings taken from sample. -----
  89. export_header = '''<?xml version="1.0" encoding="UTF-8"?>
  90. <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export.dtd">
  91. <en-export export-date="20081011T080456Z" application="Evernote" version="Mac 1.1.5 (36338)">
  92. '''
  93.  
  94. export_close = '''</en-export>'''
  95.  
  96. export_content_start = '''<content><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
  97. <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml.dtd">
  98. <en-note>'''
  99.  
  100. export_content_end = '''</en-note>]]></content>'''
  101.  
  102. # string if there's no link. make it look like a manual created note.
  103. export_deflink = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source-application>Evernote Web; 36564; Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3</source-application></note-attributes>'''
  104.  
  105. # string if there's a link. make it look like a web clip.
  106. export_link_start = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source>web.clip</source><source-url>'''
  107. export_link_end = '''</source-url></note-attributes>'''
  108.  
  109. def writenode(writeout, nodename, data):
  110.     ''' write data in the nodename as tagname to writeout '''
  111.     writeout.write( '<%s>' % nodename )
  112.     writeout.write(data)
  113.     writeout.write( '</%s>' % nodename )
  114.  
  115. def writenote(writeout, note):
  116.     ''' func for 1 note'''
  117.     writeout.write('<note>')
  118.  
  119.     if 'title' not in note:
  120.         note['title'] = ''
  121.     if 'content' not in note:
  122.         note['content'] = ''
  123.  
  124.     title = note['title'].encode('utf-8')
  125.     content = note['content'].encode('utf-8')
  126.  
  127.     writenode(writeout,'title', title)
  128.  
  129.     writeout.write(export_content_start)
  130.     writeout.write(content)
  131.     writeout.write(export_content_end)
  132.  
  133.     # weird way of date string from evernote.
  134.     datefoo = note['date'].strftime('%Y%m%d')
  135.     datebar = note['date'].strftime('%H%M%S')
  136.     datestr = datefoo + 'T' + datebar + 'Z'
  137.  
  138.     writenode(writeout, 'created', datestr)
  139.     writenode(writeout, 'updated', datestr)
  140.  
  141.     if 'link' in note:
  142.         writeout.write(export_link_start)
  143.         link = sax.saxutils.escape(note['link'])
  144.         writeout.write(link)
  145.         writeout.write(export_link_end)
  146.     else:
  147.         writeout.write(export_deflink)
  148.    
  149.     writeout.write('</note>')
  150.  
  151. def main():
  152.     read = ParseGoogleReader();
  153.     sax.parse('read.xml', read)
  154.  
  155.     print len(read.notelist)
  156.     pp = pprint.PrettyPrinter(width=80)
  157.     #pp.pprint(read.notelist)
  158.  
  159.     writeout = open('write.enex','w')
  160.     writeout.write(export_header)
  161.     for note in read.notelist:
  162.         writenote(writeout,note)
  163.     writeout.write(export_close)
  164.     writeout.flush()
  165.     writeout.close()
  166.  
  167. if __name__ == "__main__":
  168.     main()
  169.  
Advertisement
Add Comment
Please, Sign In to add comment