Advertisement
stackexchange-gilles

community-eval/stats-html2md

Feb 11th, 2013
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.84 KB | None | 0 0
  1. #! /usr/bin/env python
  2. import re, sys
  3. from BeautifulSoup import BeautifulSoup
  4.  
  5. def parse(stream):
  6.     html = stream.read()
  7.     html = re.sub(r'(?<=[-0-9A-Za-z])"">', '">a', html) # http://meta.stackoverflow.com/questions/167180/invalid-html-in-admin-review-community-eval-stats
  8.     soup = BeautifulSoup(html)
  9.     table = soup.find('table', {'class': 'sorter'})
  10.     heads = [tag.find('span').contents[0]
  11.              for tag in table.find('thead').find('tr').findAll('th')]
  12.     rows = [[cell.contents[0] for cell in row.findAll('td')[1:]] +
  13.             [row.find('a')['href']]
  14.             for row in table.find('tbody').findAll('tr')]
  15.     return rows
  16.  
  17. def print_markdown(sitename, data, out):
  18.     data = [row[0:3] + row[5:] for row in data]
  19.     digits = map(int, data[0][:-1])
  20.     for row in data[1:]:
  21.         for j in range(len(row)-1):
  22.             n = abs(int(row[j]))
  23.             if n > digits[j]: digits[j] = n
  24.     for j in range(len(digits)): digits[j] = len(str(digits[j]))
  25.     out.write('`Ex Sa NI sc`  \n')
  26.     for row in data:
  27.         for j in range(len(digits)):
  28.             n = row[j]
  29.             padding = digits[j] - len(n)
  30.             if n[0] == '-': n = '&#x2012;' + n[1:]
  31.             else: padding += 1
  32.             out.write('&#x2007;' * padding + n + ' ')
  33.         out.write('&nbsp;' * 2 + 'http://' + site_name + row[-1] + '  \n')
  34.     out.write('\n')
  35.     out.write('<sub> `Ex`/`Sa`/`NI`: number of &ldquo;excellent/satisfactory/needs improvement&rdquo; ratings </sub>  \n')
  36.     out.write('<sup> `sc`: total score </sup>\n')
  37.  
  38. if __name__ == '__main__':
  39.     if len(sys.argv) <= 1:
  40.         print 'Usage:', sys.argv[0], 'SITE_NAME <community-eval-stats.html'
  41.         sys.exit(1)
  42.     site_name = sys.argv[1]
  43.     if site_name.find('.') < 0: site_name += '.stackexchange.com'
  44.     print_markdown(site_name, parse(sys.stdin), sys.stdout)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement