Guest User

RSS Generation with Bottle

a guest
Mar 19th, 2021
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.98 KB | None | 0 0
  1. import datetime, os, re
  2. from bottle import error, response, route, run, static_file, template, TEMPLATE_PATH
  3.  
  4. ## Clean ##
  5.  
  6. # Remove links, line breaks from snippet
  7. def clean(result):
  8.     result = result.replace('\n','')
  9.     result = result.replace('<br>','')
  10.     result = re.sub(r'<a href=.*?>', '', result)
  11.     result = re.sub(r'<img src=.*?>', '', result)
  12.     result = re.sub(r'<a target="_blank" href=.*?>', '', result)
  13.     result = result.replace('</a>','')
  14.     result = re.sub(r'<h\d>','',result)
  15.     result = re.sub(r'</h\d>','',result)
  16.     result = result.replace('<center>','')
  17.     result = result.replace('</center>','')
  18.     result = result.replace('<b>','')
  19.     result = result.replace('</b>','')
  20.     return result
  21.  
  22. # Remove tags
  23. def clean_tags(raw):
  24.     cleanr = re.compile('<.*?>')
  25.     cleantext = re.sub(cleanr, '', raw)
  26.     return cleantext
  27.  
  28. ## Diary Entry Collection ##
  29.  
  30. # Return list of all diary entries
  31. def gather_and_sort(loc):
  32.     return sort_files(curate_files(gather_files(loc)))
  33.  
  34. # Sort diary - newest to oldest
  35. def sort_files(files):
  36.     files.sort(reverse=True)
  37.     return files
  38.  
  39. def gather_files(loc):
  40.     files = os.listdir(loc)
  41.     return files
  42.  
  43. def curate_files(files):
  44.     clean = []
  45.     for f in files:
  46.         if is_it_time(f):
  47.             clean.append(f)
  48.     return clean
  49.  
  50. # Is it time to publish the article yet?
  51. def is_it_time(date):
  52.     today = datetime.datetime.now()
  53.     today_string = today.strftime("%y%m%d")
  54.     return int(date) <= int(today_string)
  55.  
  56. ## RSS Generation ##
  57.  
  58. # Return list of items
  59. def list_items(articles):
  60.     f_name = "static/xml/blessfrey.xml" # the RSS file
  61.     loc2 = 'https://www.blessfrey.me/'
  62.     loc = 'diary/entries/'
  63.     loc3 = loc2 + loc
  64.     result = []
  65.  
  66.     for article in articles:
  67.         path = loc + article
  68.         text = []
  69.         a = []
  70.         length = 0
  71.         text = article2list(article, loc)
  72.         a.append(find_title(text))
  73.         a.append(find_url(path))
  74.         a.append(clean_tags(prepare_rss_summary(text, path)))
  75.         a.append(find_timestamp(text))
  76.         result.append(a)
  77.  
  78.     clear_file(f_name)
  79.     f = open(f_name, 'w')
  80.     f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>" + '\n')
  81.     f.write("<rss version=\"2.0\">" + '\n')
  82.     f.write("<channel>" + '\n')
  83.     f.write("<title>blessfrey.me</title>" + '\n')
  84.     f.write("<link>https://www.blessfrey.me/</link>" + '\n')
  85.     f.write("<description>chimchooree's dev space</description>" + '\n')
  86.     f.write("<language>en-us</language>" + '\n')
  87.     f.write("<webMaster>[email protected] (chimchooree)</webMaster>" + '\n')
  88.  
  89.     for r in result:
  90.         f.write("<item>" + '\n')
  91.         f.write("<title>" + r[0] + "</title>" + '\n')
  92.         f.write("<link>" + loc3 + r[1] + "</link>" + '\n')
  93.         f.write("<description>" + r[2] + "</description>" + '\n')
  94.         code = r[1].replace(loc,'')
  95.         code = code.replace('/','')
  96.         f.write("<pubDate>" + format_rss_time(code) + "</pubDate>" + '\n')
  97.         f.write("<guid>" + loc3 + r[1] + "</guid>" + '\n')
  98.         f.write("</item>" + '\n')
  99.  
  100.     f.write("</channel>" + '\n')
  101.     f.write("</rss>" + '\n')
  102.     f.close()
  103.  
  104.     return result
  105.  
  106. # Return article as list of lines of text
  107. def article2list(article, loc):
  108.     text = []
  109.     with open(loc + article) as f:
  110.         text = f.readlines()
  111.     return text
  112.  
  113. # Return clean title
  114. def find_title(text):
  115.     return clean(text[1])
  116.  
  117. # Return URL of article
  118. def find_url(path):
  119.     return '/' + path.replace('.tpl','')
  120.  
  121. # Snip article and close any open list tags
  122. def prepare_rss_summary(text, path):
  123.     content = snip_sentence(find_content(text), path)
  124.     if content.count('<ul>') > content.count('</ul>'):
  125.         content += '</ul>'
  126.     return content
  127.  
  128. # Return first two sentences of article + " ... "
  129. def snip_sentence(article, path):
  130.     article = clean(article)
  131.     limit = 100
  132.     result = article[0:min(len(article),limit)]
  133.     result = result.rsplit(' ',1)[0]
  134.     return result + " ... "
  135.  
  136. # Return article text without HTML header
  137. def find_content(text):
  138.     length = len(text)
  139.     content = ""
  140.     # form a string from relevant lines of the article
  141.     pos = 0
  142.     for line in text:
  143.         # skip to line 5
  144.         if pos > 4 and pos < length:
  145.             content += line
  146.         pos += 1
  147.     return content
  148.  
  149. # Return clean timestamp
  150. def find_timestamp(text):
  151.     return text[2].replace('<br>','')
  152.  
  153. # Convert diary entry name to RSS time
  154. def format_rss_time(date):
  155.     return datetime.datetime.strptime(date, '%y%m%d').strftime('%a') + ', ' + datetime.datetime.strptime(date, '%y%m%d').strftime('%d %b %Y') + " 05:00:05 GMT"
  156.  
  157. def make_rss():
  158.     loc = 'diary/entries/'
  159.     info = {'items': list_items(gather_and_sort(loc)[0:15])}
  160.  
  161. # Serve XML
  162. @route('/static/xml/<filename:path>')
  163. def serve_xml(filename):
  164.     return static_file(filename, root='static/xml', mimetype='text/xml')
  165.  
  166. ## Main ##
  167.  
  168. if __name__ == '__main__':
  169.     make_rss()
  170.     run(host='127.0.0.1', port=9001)
Advertisement
Add Comment
Please, Sign In to add comment