Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Marcelo Gosling

By: a guest on Sep 13th, 2009  |  syntax: Python  |  size: 3.22 KB  |  views: 342  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/env python
  2. #Copyright (c) 2009, Marcelo Gosling, marcelo.gosling@gmail.com
  3. #All rights reserved.
  4. #
  5. #Redistribution and use in source and binary forms, with or without
  6. #modification, are permitted provided that the following conditions are met:
  7. #  * Redistributions of source code must retain the above copyright
  8. #    notice, this list of conditions and the following disclaimer.
  9. #  * Redistributions in binary form must reproduce the above copyright
  10. #    notice, this list of conditions and the following disclaimer in the
  11. #    documentation and/or other materials provided with the distribution.
  12. #  * Neither the name of Marcelo Gosling nor the
  13. #    names of its contributors may be used to endorse or promote products
  14. #    derived from this software without specific prior written permission.
  15. #
  16. #THIS SOFTWARE IS PROVIDED BY Marcelo Gosling ''AS IS'' AND ANY
  17. #EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. #WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. #DISCLAIMED. IN NO EVENT SHALL Marcelo Gosling BE LIABLE FOR ANY
  20. #DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. #(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. #LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. #ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. #(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. #SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26.  
  27. from BeautifulSoup import BeautifulSoup
  28. from urllib import urlopen
  29. from pprint import pprint
  30. import re
  31.  
  32. PIRATEBAYURL='http://thepiratebay.org/top/201'
  33. TITLEENDSWITH = [ 'DvD', 'CAM', '[', '200', 'DVD', 'KLAXXON', '(', 'PROPER', 'SCREENER', 'WS', 'R5', 'TS', 'TC' ]
  34. SEARCHURL = 'http://www.metacritic.com/search/process?sort=relevance&termType=all&ts=%s&ty=1&x=0&y=0'
  35.  
  36. def clean_title(title):
  37.     for separator in TITLEENDSWITH:
  38.         title = title.split(separator)[0]
  39.     title = ' '.join(title.split('.'))
  40.     title = '&'.join(title.split('&'))
  41.     return title.strip()
  42.  
  43. def get_score(title,verbose=False):
  44.     url = SEARCHURL % '+'.join(title.split(' '))
  45.     soup = BeautifulSoup(urlopen(url).read())
  46.     regex = re.compile('^red$|^yellow$|^green$')
  47.     try:
  48.         score = int(soup.find(attrs={'class' : regex}).contents[0])
  49.         if verbose:
  50.             print score, title
  51.         return score
  52.     except:
  53.         if verbose:
  54.             print 'Could not get score for %s.' % title
  55.         return 0
  56.  
  57. def get_list(verbose=False):
  58.     if verbose:
  59.         print "Getting Pirate Bay 100 top movie torrents...",
  60.     soup = BeautifulSoup(urlopen(PIRATEBAYURL).read())
  61.     if verbose:
  62.         print "Done!"
  63.     links = soup.findAll(attrs={'class': 'detLink'})
  64.     torrents = [link.contents[0] for link in links]
  65.     movies = [clean_title(title) for title in torrents]
  66.     scores = [get_score(title, verbose) for title in movies]
  67.     results = {}
  68.     for i in xrange(len(movies)):
  69.         results[movies[i]] = scores[i]
  70.     return results
  71.  
  72. if __name__ == '__main__':
  73.     results = get_list(True)
  74.     for i in sorted(results, key = results.get):
  75.         print '%2d  %s' % (results[i], i)