RSS Generation with Bottle

import datetime, os, re
from bottle import error, response, route, run, static_file, template, TEMPLATE_PATH

## Clean ##

# Remove links, line breaks from snippet
def clean(result):
    result = result.replace('\n','')
    result = result.replace('<br>','')
    result = re.sub(r'<a href=.*?>', '', result)
    result = re.sub(r'<img src=.*?>', '', result)
    result = re.sub(r'<a target="_blank" href=.*?>', '', result)
    result = result.replace('</a>','')
    result = re.sub(r'<h\d>','',result)
    result = re.sub(r'</h\d>','',result)
    result = result.replace('<center>','')
    result = result.replace('</center>','')
    result = result.replace('<b>','')
    result = result.replace('</b>','')
    return result

# Remove tags
def clean_tags(raw):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw)
    return cleantext

## Diary Entry Collection ##

# Return list of all diary entries
def gather_and_sort(loc):
    return sort_files(curate_files(gather_files(loc)))

# Sort diary - newest to oldest
def sort_files(files):
    files.sort(reverse=True)
    return files

def gather_files(loc):
    files = os.listdir(loc)
    return files

def curate_files(files):
    clean = []
    for f in files:
        if is_it_time(f):
            clean.append(f)
    return clean

# Is it time to publish the article yet?
def is_it_time(date):
    today = datetime.datetime.now()
    today_string = today.strftime("%y%m%d")
    return int(date) <= int(today_string)

## RSS Generation ##

# Return list of items
def list_items(articles):
    f_name = "static/xml/blessfrey.xml" # the RSS file
    loc2 = 'https://www.blessfrey.me/'
    loc = 'diary/entries/'
    loc3 = loc2 + loc
    result = []

    for article in articles:
        path = loc + article
        text = []
        a = []
        length = 0
        text = article2list(article, loc)
        a.append(find_title(text))
        a.append(find_url(path))
        a.append(clean_tags(prepare_rss_summary(text, path)))
        a.append(find_timestamp(text))
        result.append(a)

    clear_file(f_name)
    f = open(f_name, 'w')
    f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>" + '\n')
    f.write("<rss version=\"2.0\">" + '\n')
    f.write("<channel>" + '\n')
    f.write("<title>blessfrey.me</title>" + '\n')
    f.write("<link>https://www.blessfrey.me/</link>" + '\n')
    f.write("<description>chimchooree's dev space</description>" + '\n')
    f.write("<language>en-us</language>" + '\n')
    f.write("<webMaster>[email protected] (chimchooree)</webMaster>" + '\n')

    for r in result:
        f.write("<item>" + '\n')
        f.write("<title>" + r[0] + "</title>" + '\n')
        f.write("<link>" + loc3 + r[1] + "</link>" + '\n')
        f.write("<description>" + r[2] + "</description>" + '\n')
        code = r[1].replace(loc,'')
        code = code.replace('/','')
        f.write("<pubDate>" + format_rss_time(code) + "</pubDate>" + '\n')
        f.write("<guid>" + loc3 + r[1] + "</guid>" + '\n')
        f.write("</item>" + '\n')

    f.write("</channel>" + '\n')
    f.write("</rss>" + '\n')
    f.close()

    return result

# Return article as list of lines of text
def article2list(article, loc):
    text = []
    with open(loc + article) as f:
        text = f.readlines()
    return text

# Return clean title
def find_title(text):
    return clean(text[1])

# Return URL of article
def find_url(path):
    return '/' + path.replace('.tpl','')

# Snip article and close any open list tags
def prepare_rss_summary(text, path):
    content = snip_sentence(find_content(text), path)
    if content.count('<ul>') > content.count('</ul>'):
        content += '</ul>'
    return content

# Return first two sentences of article + " ... "
def snip_sentence(article, path):
    article = clean(article)
    limit = 100
    result = article[0:min(len(article),limit)]
    result = result.rsplit(' ',1)[0]
    return result + " ... "

# Return article text without HTML header
def find_content(text):
    length = len(text)
    content = ""
    # form a string from relevant lines of the article
    pos = 0
    for line in text:
        # skip to line 5
        if pos > 4 and pos < length:
            content += line
        pos += 1
    return content

# Return clean timestamp
def find_timestamp(text):
    return text[2].replace('<br>','')

# Convert diary entry name to RSS time
def format_rss_time(date):
    return datetime.datetime.strptime(date, '%y%m%d').strftime('%a') + ', ' + datetime.datetime.strptime(date, '%y%m%d').strftime('%d %b %Y') + " 05:00:05 GMT"

def make_rss():
    loc = 'diary/entries/'
    info = {'items': list_items(gather_and_sort(loc)[0:15])}

# Serve XML
@route('/static/xml/<filename:path>')
def serve_xml(filename):
    return static_file(filename, root='static/xml', mimetype='text/xml')

## Main ##

if __name__ == '__main__':
    make_rss()
    run(host='127.0.0.1', port=9001)