Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime, os, re
- from bottle import error, response, route, run, static_file, template, TEMPLATE_PATH
- ## Clean ##
- # Remove links, line breaks from snippet
- def clean(result):
- result = result.replace('\n','')
- result = result.replace('<br>','')
- result = re.sub(r'<a href=.*?>', '', result)
- result = re.sub(r'<img src=.*?>', '', result)
- result = re.sub(r'<a target="_blank" href=.*?>', '', result)
- result = result.replace('</a>','')
- result = re.sub(r'<h\d>','',result)
- result = re.sub(r'</h\d>','',result)
- result = result.replace('<center>','')
- result = result.replace('</center>','')
- result = result.replace('<b>','')
- result = result.replace('</b>','')
- return result
- # Remove tags
- def clean_tags(raw):
- cleanr = re.compile('<.*?>')
- cleantext = re.sub(cleanr, '', raw)
- return cleantext
- ## Diary Entry Collection ##
- # Return list of all diary entries
- def gather_and_sort(loc):
- return sort_files(curate_files(gather_files(loc)))
- # Sort diary - newest to oldest
- def sort_files(files):
- files.sort(reverse=True)
- return files
- def gather_files(loc):
- files = os.listdir(loc)
- return files
- def curate_files(files):
- clean = []
- for f in files:
- if is_it_time(f):
- clean.append(f)
- return clean
- # Is it time to publish the article yet?
- def is_it_time(date):
- today = datetime.datetime.now()
- today_string = today.strftime("%y%m%d")
- return int(date) <= int(today_string)
- ## RSS Generation ##
- # Return list of items
- def list_items(articles):
- f_name = "static/xml/blessfrey.xml" # the RSS file
- loc2 = 'https://www.blessfrey.me/'
- loc = 'diary/entries/'
- loc3 = loc2 + loc
- result = []
- for article in articles:
- path = loc + article
- text = []
- a = []
- length = 0
- text = article2list(article, loc)
- a.append(find_title(text))
- a.append(find_url(path))
- a.append(clean_tags(prepare_rss_summary(text, path)))
- a.append(find_timestamp(text))
- result.append(a)
- clear_file(f_name)
- f = open(f_name, 'w')
- f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>" + '\n')
- f.write("<rss version=\"2.0\">" + '\n')
- f.write("<channel>" + '\n')
- f.write("<title>blessfrey.me</title>" + '\n')
- f.write("<link>https://www.blessfrey.me/</link>" + '\n')
- f.write("<description>chimchooree's dev space</description>" + '\n')
- f.write("<language>en-us</language>" + '\n')
- for r in result:
- f.write("<item>" + '\n')
- f.write("<title>" + r[0] + "</title>" + '\n')
- f.write("<link>" + loc3 + r[1] + "</link>" + '\n')
- f.write("<description>" + r[2] + "</description>" + '\n')
- code = r[1].replace(loc,'')
- code = code.replace('/','')
- f.write("<pubDate>" + format_rss_time(code) + "</pubDate>" + '\n')
- f.write("<guid>" + loc3 + r[1] + "</guid>" + '\n')
- f.write("</item>" + '\n')
- f.write("</channel>" + '\n')
- f.write("</rss>" + '\n')
- f.close()
- return result
- # Return article as list of lines of text
- def article2list(article, loc):
- text = []
- with open(loc + article) as f:
- text = f.readlines()
- return text
- # Return clean title
- def find_title(text):
- return clean(text[1])
- # Return URL of article
- def find_url(path):
- return '/' + path.replace('.tpl','')
- # Snip article and close any open list tags
- def prepare_rss_summary(text, path):
- content = snip_sentence(find_content(text), path)
- if content.count('<ul>') > content.count('</ul>'):
- content += '</ul>'
- return content
- # Return first two sentences of article + " ... "
- def snip_sentence(article, path):
- article = clean(article)
- limit = 100
- result = article[0:min(len(article),limit)]
- result = result.rsplit(' ',1)[0]
- return result + " ... "
- # Return article text without HTML header
- def find_content(text):
- length = len(text)
- content = ""
- # form a string from relevant lines of the article
- pos = 0
- for line in text:
- # skip to line 5
- if pos > 4 and pos < length:
- content += line
- pos += 1
- return content
- # Return clean timestamp
- def find_timestamp(text):
- return text[2].replace('<br>','')
- # Convert diary entry name to RSS time
- def format_rss_time(date):
- return datetime.datetime.strptime(date, '%y%m%d').strftime('%a') + ', ' + datetime.datetime.strptime(date, '%y%m%d').strftime('%d %b %Y') + " 05:00:05 GMT"
- def make_rss():
- loc = 'diary/entries/'
- info = {'items': list_items(gather_and_sort(loc)[0:15])}
- # Serve XML
- @route('/static/xml/<filename:path>')
- def serve_xml(filename):
- return static_file(filename, root='static/xml', mimetype='text/xml')
- ## Main ##
- if __name__ == '__main__':
- make_rss()
- run(host='127.0.0.1', port=9001)
Advertisement
Add Comment
Please, Sign In to add comment