Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- import re, sys
- from BeautifulSoup import BeautifulSoup
- def parse(stream):
- html = stream.read()
- html = re.sub(r'(?<=[-0-9A-Za-z])"">', '">a', html) # http://meta.stackoverflow.com/questions/167180/invalid-html-in-admin-review-community-eval-stats
- soup = BeautifulSoup(html)
- table = soup.find('table', {'class': 'sorter'})
- heads = [tag.find('span').contents[0]
- for tag in table.find('thead').find('tr').findAll('th')]
- rows = [[cell.contents[0] for cell in row.findAll('td')[1:]] +
- [row.find('a')['href']]
- for row in table.find('tbody').findAll('tr')]
- return rows
- def print_markdown(sitename, data, out):
- data = [row[0:3] + row[5:] for row in data]
- digits = map(int, data[0][:-1])
- for row in data[1:]:
- for j in range(len(row)-1):
- n = abs(int(row[j]))
- if n > digits[j]: digits[j] = n
- for j in range(len(digits)): digits[j] = len(str(digits[j]))
- out.write('`Ex Sa NI sc` \n')
- for row in data:
- for j in range(len(digits)):
- n = row[j]
- padding = digits[j] - len(n)
- if n[0] == '-': n = '‒' + n[1:]
- else: padding += 1
- out.write(' ' * padding + n + ' ')
- out.write(' ' * 2 + 'http://' + site_name + row[-1] + ' \n')
- out.write('\n')
- out.write('<sub> `Ex`/`Sa`/`NI`: number of “excellent/satisfactory/needs improvement” ratings </sub> \n')
- out.write('<sup> `sc`: total score </sup>\n')
- if __name__ == '__main__':
- if len(sys.argv) <= 1:
- print 'Usage:', sys.argv[0], 'SITE_NAME <community-eval-stats.html'
- sys.exit(1)
- site_name = sys.argv[1]
- if site_name.find('.') < 0: site_name += '.stackexchange.com'
- print_markdown(site_name, parse(sys.stdin), sys.stdout)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement