Posted by dw on Wed 1 Oct 16:23
report abuse | download | new post
- #!/usr/bin/env python2.5
- import os, re, cgi, datetime, time, sys, urllib
- import cgitb
- from xml.etree.ElementTree import Element, SubElement, ElementTree,\
- ProcessingInstruction
- import xml.etree.ElementTree as ElementTreex
- NEW_GUID_DATE = datetime.datetime(2005, 7, 6)
- PUBLISHED_FLAG_DATE = datetime.datetime(2007, 7, 12)
- class ParsedArgs:
- def __init__(self, argStr):
- for pair in (argStr or '').split('&'):
- if '=' in pair:
- key, value = pair.split('=', 1)
- value = urllib.unquote(value)
- else:
- key = pair
- value = True
- setattr(self, urllib.unquote(key), value)
- def __getattr__(self, key):
- return False
- def myfilter(filter_func, sequence):
- '''
- As with the filter() built-in, except return a list of
- (fn_result, <object>) tuples rather than simply a list of <object>s.
- '''
- output = []
- for item in sequence:
- result = filter_func(item)
- if result:
- output.append((result, item))
- return output
- class Entry:
- XMLNS = 'http://www.w3.org/2005/Atom'
- def __init__(self, create_date):
- self.create_date = create_date
- self.title = None
- self.html = False
- self.tags = []
- if create_date < NEW_GUID_DATE:
- s = create_date.strftime('%Y-%m-%d-%H:%M:%S')
- self.guid = cgi.escape(s.replace(' ', '_'))
- else:
- self.guid = 'entry_' + str(int(time.mktime(create_date.timetuple())))
- if create_date < PUBLISHED_FLAG_DATE:
- self.hidden = False
- else:
- self.hidden = True
- self.filename = create_date.strftime('%Y-%m-%d-%H:%M:%S')
- pathname = os.path.join(entries_dir, self.filename)
- self.fp = fp = file(pathname)
- ts = os.fstat(fp.fileno()).st_mtime
- self.edit_date = create_date
- #self.edit_date = datetime.datetime.fromtimestamp(ts)
- fileIter = iter(file(pathname))
- for line in fileIter:
- line = line.rstrip()
- if ': ' in line:
- key, value = line.split(': ', 1)
- key = key.lower()
- if key in ('category', 'tags'):
- self.tags = value.split(', ')
- elif key == 'title':
- self.title = value
- elif not line:
- break
- elif not self.title:
- self.title = line.rstrip()
- else:
- if line == 'hidden':
- self.hidden = True
- elif line == 'published':
- self.hidden = False
- elif line == 'html':
- self.html = True
- self.body = ''.join([ x for x in fileIter ])
- def format_body(self, bodyOnly = False):
- if bodyOnly:
- if self.html:
- return self.body
- else:
- return self.format_quicktext()
- cdate = self.create_date.strftime('%a %d %B %Y, %H:%M')
- mdate = self.edit_date.strftime('%a %d %B %Y, %H:%M')
- bits = [
- '<a name="%s"></a>' % self.guid,
- '<div class="entry">',
- '<h2>%s</h2>\n' % cgi.escape(self.title),
- '<p class="date">',
- #'<em>Modified:</em> %s<br />' % mdate,
- '<em>Created:</em> %s' % cdate,
- '</p>\n',
- '<div class="paras">'
- ]
- if self.html:
- bits.append(self.body)
- else:
- bits.append(self.format_quicktext())
- bits.append('</div>')
- bits.append('''<div id="disqus_thread"></div><script type="text/javascript" src="http://disqus.com/forums/dmw/embed.js"></script><noscript><a href="http://dmw.disqus.com/?url=ref">View the discussion thread.</a></noscript><a class="dsq-brlink"><span class="logo-disqus"></span></a>''')
- bits.append('</div>')
- return '\n'.join(bits)
- def format_quicktext(self):
- paras = map(htmlise, map(cgi.escape, self.body.split('\n\n')))
- data = []
- for para in paras:
- if para.startswith('<'):
- data.append(para)
- elif para:
- data.append("<p>\n" + para + "</p>\n")
- return '\n'.join(data)
- def get_dates(cls, substr = None):
- dateRe = re.compile(
- r'^(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)'
- '(?:.html)?$')
- entries = []
- for match, item in myfilter(dateRe.match, os.listdir(entries_dir)):
- if substr is not None and substr not in item:
- continue
- entries.append(datetime.datetime(*map(int, match.groups())))
- entries.sort()
- return entries
- get_dates = classmethod(get_dates)
- def format_entry_rss(entry, chan, idx):
- cdate = entry.create_date.strftime('%a %d %B %Y, %H:%M')
- link = 'http://dmw.me.uk/weblog#' + entry.guid
- item = SubElement(chan, 'item')
- SubElement(item, 'title').text = entry.title
- SubElement(item, 'description').text = entry.format_body(bodyOnly = True)
- SubElement(item, 'link').text = link
- SubElement(item, 'guid').text = link
- SubElement(item, 'pubDate').text =\
- entry.create_date.strftime('%a, %d %b %Y %H:%M:%S GMT')
- def format_blog(id = None, hidden = False):
- entries = Entry.get_dates(substr = id)
- entries.reverse() # newest first.
- entries = entries[:10]
- output = ''
- for date in entries:
- entry = Entry(date)
- if (not entry.hidden) or hidden or 1:
- output += entry.format_body()
- return output
- def format_index():
- title_links = []
- for dt in Entry.get_dates():
- entry = Entry(dt)
- title_links.append(
- '%s: '
- '<code>%s%s</code> '
- '<a href="%s">%s</a><br />' %
- (entry.create_date,
- ['.', 'P'][int('private' in entry.tags)],
- ['.', 'H'][int(entry.hidden)],
- "/weblog/%s" % entry.filename,
- cgi.escape(str(entry.title))))
- return '\n'.join(title_links)
- def format_rss(hidden):
- entries = Entry.get_dates()
- entries.reverse() # newest first.
- entries = entries[:1]
- strip = str.strip
- # RSS header.
- doc = Element('rss', version = '2.0')
- chan = SubElement(doc, 'channel')
- SubElement(chan, 'title').text = 'David Wilson\'s Weblog'
- SubElement(chan, 'link').text = 'http://dmw.me.uk/weblog/rss'
- SubElement(chan, 'description').text = strip('''
- Random musings and thoughts from David Wilson, a programmer/geek of
- sorts from Belfast, Northern Ireland.
- ''')
- SubElement(chan, 'copyright').text = strip('''
- Copyright 2004-2007, David Wilson.
- ''')
- SubElement(chan, 'lastBuildDate').text =\
- time.strftime('%a, %d %b %Y %H:%M:%S GMT')
- SubElement(chan, 'generator').text =\
- 'Python%s; ElementTree %s; blog.py 0.3' %\
- (str(sys.version_info), ElementTreex.VERSION)
- SubElement(chan, 'docs').text =\
- 'http://blogs.law.harvard.edu/tech/rss'
- for idx, date in enumerate(entries):
- entry = Entry(date)
- if (not entry.hidden) or hidden:
- format_entry_rss(entry, chan, idx)
- return ElementTreex.tostring(doc)
- def htmlise(text):
- if text == '--':
- return '<div class="divider"> </div>'
- lines = text.split("\n")
- list_re = re.compile(r'^ - (.+)')
- bleh = myfilter(list_re.match, lines)
- if len(bleh) == len(lines):
- out = [ '<ul>\n' ]
- out += [ '<li>' + htmlise(match.groups()[0]) + '</li>' for match,line in bleh ]
- out.append('</ul>\n\n')
- return '\n'.join(out)
- R0 = re.compile(r'^>>(.+)<<$')
- bleh = R0.match(text)
- if bleh:
- return '<h3>' + bleh.groups()[0] + '</h3>\n'
- Rimg = re.compile(r'<img (http[^&]+)>')
- text = Rimg.sub(r'<img src="\1" alt="" />', text)
- Rimg = re.compile(r'<img ([^&]+)>')
- text = Rimg.sub(r'<img src="\1" alt="" />', text)
- R1 = re.compile(r'<(http://[^&]+)>([^\n]+?)</a>', re.M)
- text = R1.sub(r'<a href="\1">\2</a>', text)
- R2 = re.compile(r'<(http://[^&]+)>', re.M)
- return R2.sub(r'<a href="\1">\1</a>', text)
- def get_header():
- return file(os.path.join(blog_dir, 'header.html')).read()
- def get_footer():
- return file(os.path.join(blog_dir, 'footer.html')).read()
- def RunCGI():
- global blog_dir, entries_dir
- cgitb.enable(display=1, logdir="/tmp")
- # Directory where blog.py is stored.
- blog_dir = os.path.dirname(os.environ['SCRIPT_FILENAME'])
- entries_dir = os.path.join(blog_dir, 'entries')
- args = ParsedArgs(os.environ['QUERY_STRING'])
- print 'Content-type: text/html'
- print
- if args.id:
- sys.stdout.write(format_blog(id = args.id))
- elif args.index:
- sys.stdout.write(format_index())
- elif args.body_only:
- sys.stdout.write(format_blog(hidden = args.hidden != False))
- elif args.rss:
- sys.stdout.write(format_rss(hidden = args.hidden != False))
- else:
- sys.stdout.write(get_header())
- sys.stdout.write(format_blog())
- sys.stdout.write(get_footer())
- if __name__ == '__main__':
- RunCGI()
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.