#!/usr/bin/python
#
# ------------------------------------------------------------------------
# quick hack for importing google notebook notes onto evernote.
# some parts are from samples from my test export.
# written by rainyrhy on 23 Oct 2008
# ---------------
# as for now, it opens read.xml in the current directory and outputs as
# write.enex
# ---------------
# this script works successfully most of the time.
# known problems:
# - exporting - source url in title
# - evernote can't sync some notes upstream, said that content
# data is incorrect
# - tag not supported.
# ------------------------------------------------------------------------
from xml import sax
from datetime import datetime
import pprint
class ParseGoogleReader(sax.handler.ContentHandler):
def startDocument(self):
self.notelist = []
self.curnote = None
self.receive = None # characters receive status, enumuration of the 3 below
self.recv_date = 'date'
self.recv_title = 'title'
self.recv_content = 'content'
def startElement(self, name, attrs):
self.receive = None # in case it skipped characters callback
# cause there is none
if name == 'entry':
print 'new note'
self.curnote = {}
if self.curnote is None: return
if name == 'updated':
print 'taking date'
self.receive = self.recv_date
if name == 'title':
print 'taking title'
self.receive = self.recv_title
if name == 'content':
print 'taking content'
self.receive = self.recv_content
if name == 'link':
content = attrs.getValue('href')
print 'link', content
self.curnote['link'] = content
def endElement(self, name):
if name == 'entry':
print 'end note'
self.notelist.append(self.curnote)
self.curnote = None;
def characters(self, content):
if self.receive is self.recv_date:
print 'receiving date', content
content = content[ : content.rfind('.')]
content = content.replace('T', ' ')
date = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
self.curnote['date'] = date
if self.receive is self.recv_title:
try:
print 'receiving title', content
except UnicodeEncodeError:
pass
self.curnote['title'] = content
if self.receive is self.recv_content:
print 'receiving content', len(content)
if 'content' in self.curnote:
self.curnote['content'] += content
else:
self.curnote['content'] = content
# -- some strings taken from sample. -----
export_header = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export.dtd">
<en-export export-date="20081011T080456Z" application="Evernote" version="Mac 1.1.5 (36338)">
'''
export_close = '''</en-export>'''
export_content_start = '''<content><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml.dtd">
<en-note>'''
export_content_end = '''</en-note>]]></content>'''
# string if there's no link. make it look like a manual created note.
export_deflink = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source-application>Evernote Web; 36564; Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3</source-application></note-attributes>'''
# string if there's a link. make it look like a web clip.
export_link_start = '''<note-attributes><subject-date>19700101T000000Z</subject-date><source>web.clip</source><source-url>'''
export_link_end = '''</source-url></note-attributes>'''
def writenode(writeout, nodename, data):
''' write data in the nodename as tagname to writeout '''
writeout.write( '<%s>' % nodename )
writeout.write(data)
writeout.write( '</%s>' % nodename )
def writenote(writeout, note):
''' func for 1 note'''
writeout.write('<note>')
if 'title' not in note:
note['title'] = ''
if 'content' not in note:
note['content'] = ''
title = note['title'].encode('utf-8')
content = note['content'].encode('utf-8')
writenode(writeout,'title', title)
writeout.write(export_content_start)
writeout.write(content)
writeout.write(export_content_end)
# weird way of date string from evernote.
datefoo = note['date'].strftime('%Y%m%d')
datebar = note['date'].strftime('%H%M%S')
datestr = datefoo + 'T' + datebar + 'Z'
writenode(writeout, 'created', datestr)
writenode(writeout, 'updated', datestr)
if 'link' in note:
writeout.write(export_link_start)
link = sax.saxutils.escape(note['link'])
writeout.write(link)
writeout.write(export_link_end)
else:
writeout.write(export_deflink)
writeout.write('</note>')
def main():
read = ParseGoogleReader();
sax.parse('read.xml', read)
print len(read.notelist)
pp = pprint.PrettyPrinter(width=80)
#pp.pprint(read.notelist)
writeout = open('write.enex','w')
writeout.write(export_header)
for note in read.notelist:
writenote(writeout,note)
writeout.write(export_close)
writeout.flush()
writeout.close()
if __name__ == "__main__":
main()