1. #!/usr/bin/env python
  2. '''
  3. Created on May 8, 2012
  4. @author: Nisheeth
  5. '''
  6. import urllib2
  7. import re
  8. import operator
  9. from BeautifulSoup import BeautifulSoup
  10. from os import path
  11. from datetime import datetime, timedelta
  12.  
  13. def render_page(root_url):
  14.     '''
  15.        @param root_url: URL of main thread
  16.    '''  
  17.     page_index = 0
  18.     page_count = 1
  19.     total_votes = 0;
  20.     vote_count = {}
  21.     k = 0;
  22.     while page_index < page_count:
  23.         url =  root_url[:-5]
  24.         if page_index > 0:
  25.             url += '-'+str(page_index+1)
  26.         url += '.html'
  27.        
  28.         #print url
  29.         response = urllib2.urlopen(url)
  30.         page = response.read()        
  31.         response.close()
  32.         parsed_page = BeautifulSoup(page)
  33.         pat = re.compile("^post_message_.*")        
  34.         page_count_txt = ''.join(parsed_page.find('div', attrs={'class': 'pagenav'}).find('td', attrs={'class': 'vbmenu_control'}).findAll(text=True));        
  35.         page_count = int(page_count_txt[len(page_count_txt)-page_count_txt[::-1].index(' '):])
  36.                
  37.         result=parsed_page.findAll('div', attrs={'id': pat})
  38.         first_post = False
  39.         if page_index == 0:
  40.             result = result[1:]
  41.         else:
  42.             first_post = True
  43.                                
  44.         page_index += 1
  45.         for r in result:            
  46.             i = 2;            
  47.             if first_post: # fix for ads in first post
  48.                 t = re.compile(r'.*<!-- END TEMPLATE: ad_showthread_firstpost_start ', re.S)                
  49.                 inner_text = re.sub(t,' ',''.join(r.findAll(text=True)))
  50.                 first_post = False
  51.             else:
  52.                 inner_text = ''.join(r.findAll(text=True))
  53.             #print inner_text
  54.             for u in re.sub(r'[\t\r\n]+',r'\n', inner_text.strip()).split('\n'):
  55.                 score = 2**i
  56.                 i -=1
  57.                 if not vote_count.has_key(u.lower()):
  58.                     vote_count[u.lower()] = []
  59.                     vote_count[u.lower()].append(score)            
  60.                     vote_count[u.lower()].append(1)                        
  61.                 else:
  62.                     vote_count[u.lower()][0] += score
  63.                     vote_count[u.lower()][1] += 1  
  64.                 total_votes += 1
  65.     table_html="";    
  66.     #print vote_count                        
  67.     vote_count = sorted(vote_count.items(), key=operator.itemgetter(0), reverse=True)
  68.     #print vote_count        
  69.     for k,v in vote_count:
  70.         table_html += ("<tr><td>%s</td><td>%s</td><td>%s</td></tr>" % (k, v[1], v[0]))
  71.    
  72.     return """
  73.    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  74.    <html xmlns="http://www.w3.org/1999/xhtml">
  75.    <head>
  76.    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  77.    <title>ThinkDigit Workspace Showoff Contest Scores</title>
  78.    </head>
  79.    <body>
  80.    <div style="font-style:italic">Updates every 10 minutes</div>
  81.    <table width="400px" border="" style="border-style:solid;border-width:1px;border-collapse:collapse">
  82.    <thead><tr><th>User</th><th>Votes</th><th>Score</th></tr></thead>
  83.    <tbody>%s</tbody>
  84.    </table>
  85.    <div>Total Votes: <b>%s</b></div>
  86.    </body>
  87.    </html>
  88.    """ % (table_html, total_votes)    
  89.    
  90.  
  91. root_url = 'http://www.thinkdigit.com/forum/chit-chat/157002-contest-voting-thread.html';
  92. cache_filename = 'vote.cache'
  93. cache_file = None
  94. output = ''
  95.  
  96. if path.isfile(cache_filename):
  97.     if (datetime.now() - datetime.fromtimestamp(path.getmtime(cache_filename)))  > timedelta (minutes = 10):
  98.         with open(cache_filename, "w") as f:
  99.             output = render_page(root_url)
  100.             f.write(output)          
  101.     else:
  102.         with open(cache_filename, "r") as f:
  103.             output = f.read()        
  104. else:
  105.     with open(cache_filename, "w") as f:
  106.                 output = render_page(root_url)
  107.                 f.write(output)
  108.        
  109. print output