#!/usr/bin/env python
'''
Created on May 8, 2012
@author: Nisheeth
'''
import urllib2
import re
import operator
from BeautifulSoup import BeautifulSoup
from os import path
from datetime import datetime, timedelta
def render_page(root_url):
'''
@param root_url: URL of main thread
'''
page_index = 0
page_count = 1
total_votes = 0;
vote_count = {}
k = 0;
while page_index < page_count:
url = root_url[:-5]
if page_index > 0:
url += '-'+str(page_index+1)
url += '.html'
#print url
response = urllib2.urlopen(url)
page = response.read()
response.close()
parsed_page = BeautifulSoup(page)
pat = re.compile("^post_message_.*")
page_count_txt = ''.join(parsed_page.find('div', attrs={'class': 'pagenav'}).find('td', attrs={'class': 'vbmenu_control'}).findAll(text=True));
page_count = int(page_count_txt[len(page_count_txt)-page_count_txt[::-1].index(' '):])
result=parsed_page.findAll('div', attrs={'id': pat})
first_post = False
if page_index == 0:
result = result[1:]
else:
first_post = True
page_index += 1
for r in result:
i = 2;
if first_post: # fix for ads in first post
t = re.compile(r'.*<!-- END TEMPLATE: ad_showthread_firstpost_start ', re.S)
inner_text = re.sub(t,' ',''.join(r.findAll(text=True)))
first_post = False
else:
inner_text = ''.join(r.findAll(text=True))
#print inner_text
for u in re.sub(r'[\t\r\n]+',r'\n', inner_text.strip()).split('\n'):
score = 2**i
i -=1
if not vote_count.has_key(u.lower()):
vote_count[u.lower()] = []
vote_count[u.lower()].append(score)
vote_count[u.lower()].append(1)
else:
vote_count[u.lower()][0] += score
vote_count[u.lower()][1] += 1
total_votes += 1
table_html="";
#print vote_count
vote_count = sorted(vote_count.items(), key=operator.itemgetter(0), reverse=True)
#print vote_count
for k,v in vote_count:
table_html += ("<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (k, v[1], v[0]))
return """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>ThinkDigit Workspace Showoff Contest Scores</title>
</head>
<body>
<div style="font-style:italic">Updates every 10 minutes</div>
<table width="400px" border="" style="border-style:solid;border-width:1px;border-collapse:collapse">
<thead><tr><th>User</th><th>Votes</th><th>Score</th></tr></thead>
<tbody>%s</tbody>
</table>
<div>Total Votes: <b>%s</b></div>
</body>
</html>
""" % (table_html, total_votes)
root_url = 'http://www.thinkdigit.com/forum/chit-chat/157002-contest-voting-thread.html';
cache_filename = 'vote.cache'
cache_file = None
output = ''
if path.isfile(cache_filename):
if (datetime.now() - datetime.fromtimestamp(path.getmtime(cache_filename))) > timedelta (minutes = 10):
with open(cache_filename, "w") as f:
output = render_page(root_url)
f.write(output)
else:
with open(cache_filename, "r") as f:
output = f.read()
else:
with open(cache_filename, "w") as f:
output = render_page(root_url)
f.write(output)
print output