#!/usr/bin/env python2.5
import os, re, cgi, datetime, time, sys, urllib
import cgitb
from xml.etree.ElementTree import Element, SubElement, ElementTree,\
ProcessingInstruction
import xml.etree.ElementTree as ElementTreex
NEW_GUID_DATE = datetime.datetime(2005, 7, 6)
PUBLISHED_FLAG_DATE = datetime.datetime(2007, 7, 12)
class ParsedArgs:
def __init__(self, argStr):
for pair in (argStr or '').split('&'):
if '=' in pair:
key, value = pair.split('=', 1)
value = urllib.unquote(value)
else:
key = pair
value = True
setattr(self, urllib.unquote(key), value)
def __getattr__(self, key):
return False
def myfilter(filter_func, sequence):
'''
As with the filter() built-in, except return a list of
(fn_result, <object>) tuples rather than simply a list of <object>s.
'''
output = []
for item in sequence:
result = filter_func(item)
if result:
output.append((result, item))
return output
class Entry:
XMLNS = 'http://www.w3.org/2005/Atom'
def __init__(self, create_date):
self.create_date = create_date
self.title = None
self.html = False
self.tags = []
if create_date < NEW_GUID_DATE:
s = create_date.strftime('%Y-%m-%d-%H:%M:%S')
self.guid = cgi.escape(s.replace(' ', '_'))
else:
self.guid = 'entry_' + str(int(time.mktime(create_date.timetuple())))
if create_date < PUBLISHED_FLAG_DATE:
self.hidden = False
else:
self.hidden = True
self.filename = create_date.strftime('%Y-%m-%d-%H:%M:%S')
pathname = os.path.join(entries_dir, self.filename)
self.fp = fp = file(pathname)
ts = os.fstat(fp.fileno()).st_mtime
self.edit_date = create_date
#self.edit_date = datetime.datetime.fromtimestamp(ts)
fileIter = iter(file(pathname))
for line in fileIter:
line = line.rstrip()
if ': ' in line:
key, value = line.split(': ', 1)
key = key.lower()
if key in ('category', 'tags'):
self.tags = value.split(', ')
elif key == 'title':
self.title = value
elif not line:
break
elif not self.title:
self.title = line.rstrip()
else:
if line == 'hidden':
self.hidden = True
elif line == 'published':
self.hidden = False
elif line == 'html':
self.html = True
self.body = ''.join([ x for x in fileIter ])
def format_body(self, bodyOnly = False):
if bodyOnly:
if self.html:
return self.body
else:
return self.format_quicktext()
cdate = self.create_date.strftime('%a %d %B %Y, %H:%M')
mdate = self.edit_date.strftime('%a %d %B %Y, %H:%M')
bits = [
'<a name="%s"></a>' % self.guid,
'<div class="entry">',
'<h2>%s</h2>\n' % cgi.escape(self.title),
'<p class="date">',
#'<em>Modified:</em> %s<br />' % mdate,
'<em>Created:</em> %s' % cdate,
'</p>\n',
'<div class="paras">'
]
if self.html:
bits.append(self.body)
else:
bits.append(self.format_quicktext())
bits.append('</div>')
bits.append('''<div id="disqus_thread"></div><script type="text/javascript" src="http://disqus.com/forums/dmw/embed.js"></script><noscript><a href="http://dmw.disqus.com/?url=ref">View the discussion thread.</a></noscript><a class="dsq-brlink"><span class="logo-disqus"></span></a>''')
bits.append('</div>')
return '\n'.join(bits)
def format_quicktext(self):
paras = map(htmlise, map(cgi.escape, self.body.split('\n\n')))
data = []
for para in paras:
if para.startswith('<'):
data.append(para)
elif para:
data.append("<p>\n" + para + "</p>\n")
return '\n'.join(data)
def get_dates(cls, substr = None):
dateRe = re.compile(
r'^(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)'
'(?:.html)?$')
entries = []
for match, item in myfilter(dateRe.match, os.listdir(entries_dir)):
if substr is not None and substr not in item:
continue
entries.append(datetime.datetime(*map(int, match.groups())))
entries.sort()
return entries
get_dates = classmethod(get_dates)
def format_entry_rss(entry, chan, idx):
cdate = entry.create_date.strftime('%a %d %B %Y, %H:%M')
link = 'http://dmw.me.uk/weblog#' + entry.guid
item = SubElement(chan, 'item')
SubElement(item, 'title').text = entry.title
SubElement(item, 'description').text = entry.format_body(bodyOnly = True)
SubElement(item, 'link').text = link
SubElement(item, 'guid').text = link
SubElement(item, 'pubDate').text =\
entry.create_date.strftime('%a, %d %b %Y %H:%M:%S GMT')
def format_blog(id = None, hidden = False):
entries = Entry.get_dates(substr = id)
entries.reverse() # newest first.
entries = entries[:10]
output = ''
for date in entries:
entry = Entry(date)
if (not entry.hidden) or hidden or 1:
output += entry.format_body()
return output
def format_index():
title_links = []
for dt in Entry.get_dates():
entry = Entry(dt)
title_links.append(
'%s: '
'<code>%s%s</code> '
'<a href="%s">%s</a><br />' %
(entry.create_date,
['.', 'P'][int('private' in entry.tags)],
['.', 'H'][int(entry.hidden)],
"/weblog/%s" % entry.filename,
cgi.escape(str(entry.title))))
return '\n'.join(title_links)
def format_rss(hidden):
entries = Entry.get_dates()
entries.reverse() # newest first.
entries = entries[:1]
strip = str.strip
# RSS header.
doc = Element('rss', version = '2.0')
chan = SubElement(doc, 'channel')
SubElement(chan, 'title').text = 'David Wilson\'s Weblog'
SubElement(chan, 'link').text = 'http://dmw.me.uk/weblog/rss'
SubElement(chan, 'description').text = strip('''
Random musings and thoughts from David Wilson, a programmer/geek of
sorts from Belfast, Northern Ireland.
''')
SubElement(chan, 'copyright').text = strip('''
Copyright 2004-2007, David Wilson.
''')
SubElement(chan, 'lastBuildDate').text =\
time.strftime('%a, %d %b %Y %H:%M:%S GMT')
SubElement(chan, 'generator').text =\
'Python%s; ElementTree %s; blog.py 0.3' %\
(str(sys.version_info), ElementTreex.VERSION)
SubElement(chan, 'docs').text =\
'http://blogs.law.harvard.edu/tech/rss'
for idx, date in enumerate(entries):
entry = Entry(date)
if (not entry.hidden) or hidden:
format_entry_rss(entry, chan, idx)
return ElementTreex.tostring(doc)
def htmlise(text):
if text == '--':
return '<div class="divider"> </div>'
lines = text.split("\n")
list_re = re.compile(r'^ - (.+)')
bleh = myfilter(list_re.match, lines)
if len(bleh) == len(lines):
out = [ '<ul>\n' ]
out += [ '<li>' + htmlise(match.groups()[0]) + '</li>' for match,line in bleh ]
out.append('</ul>\n\n')
return '\n'.join(out)
R0 = re.compile(r'^>>(.+)<<$')
bleh = R0.match(text)
if bleh:
return '<h3>' + bleh.groups()[0] + '</h3>\n'
Rimg = re.compile(r'<img (http[^&]+)>')
text = Rimg.sub(r'<img src="\1" alt="" />', text)
Rimg = re.compile(r'<img ([^&]+)>')
text = Rimg.sub(r'<img src="\1" alt="" />', text)
R1 = re.compile(r'<(http://[^&]+)>([^\n]+?)</a>', re.M)
text = R1.sub(r'<a href="\1">\2</a>', text)
R2 = re.compile(r'<(http://[^&]+)>', re.M)
return R2.sub(r'<a href="\1">\1</a>', text)
def get_header():
return file(os.path.join(blog_dir, 'header.html')).read()
def get_footer():
return file(os.path.join(blog_dir, 'footer.html')).read()
def RunCGI():
global blog_dir, entries_dir
cgitb.enable(display=1, logdir="/tmp")
# Directory where blog.py is stored.
blog_dir = os.path.dirname(os.environ['SCRIPT_FILENAME'])
entries_dir = os.path.join(blog_dir, 'entries')
args = ParsedArgs(os.environ['QUERY_STRING'])
print 'Content-type: text/html'
print
if args.id:
sys.stdout.write(format_blog(id = args.id))
elif args.index:
sys.stdout.write(format_index())
elif args.body_only:
sys.stdout.write(format_blog(hidden = args.hidden != False))
elif args.rss:
sys.stdout.write(format_rss(hidden = args.hidden != False))
else:
sys.stdout.write(get_header())
sys.stdout.write(format_blog())
sys.stdout.write(get_footer())
if __name__ == '__main__':
RunCGI()