Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- #Copyright (c) 2009, Marcelo Gosling, marcelo.gosling@gmail.com
- #All rights reserved.
- #
- #Redistribution and use in source and binary forms, with or without
- #modification, are permitted provided that the following conditions are met:
- # * Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # * Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- # * Neither the name of Marcelo Gosling nor the
- # names of its contributors may be used to endorse or promote products
- # derived from this software without specific prior written permission.
- #
- #THIS SOFTWARE IS PROVIDED BY Marcelo Gosling ''AS IS'' AND ANY
- #EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- #WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- #DISCLAIMED. IN NO EVENT SHALL Marcelo Gosling BE LIABLE FOR ANY
- #DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- #(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- #LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- #ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- #(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- #SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- from BeautifulSoup import BeautifulSoup
- from urllib import urlopen
- from pprint import pprint
- import re
- PIRATEBAYURL='http://thepiratebay.org/top/201'
- TITLEENDSWITH = [ 'DvD', 'CAM', '[', '200', 'DVD', 'KLAXXON', '(', 'PROPER', 'SCREENER', 'WS', 'R5', 'TS', 'TC' ]
- SEARCHURL = 'http://www.metacritic.com/search/process?sort=relevance&termType=all&ts=%s&ty=1&x=0&y=0'
- def clean_title(title):
- for separator in TITLEENDSWITH:
- title = title.split(separator)[0]
- title = ' '.join(title.split('.'))
- title = '&'.join(title.split('&'))
- return title.strip()
- def get_score(title,verbose=False):
- url = SEARCHURL % '+'.join(title.split(' '))
- soup = BeautifulSoup(urlopen(url).read())
- regex = re.compile('^red$|^yellow$|^green$')
- try:
- score = int(soup.find(attrs={'class' : regex}).contents[0])
- if verbose:
- print score, title
- return score
- except:
- if verbose:
- print 'Could not get score for %s.' % title
- return 0
- def get_list(verbose=False):
- if verbose:
- print "Getting Pirate Bay 100 top movie torrents...",
- soup = BeautifulSoup(urlopen(PIRATEBAYURL).read())
- if verbose:
- print "Done!"
- links = soup.findAll(attrs={'class': 'detLink'})
- torrents = [link.contents[0] for link in links]
- movies = [clean_title(title) for title in torrents]
- scores = [get_score(title, verbose) for title in movies]
- results = {}
- for i in xrange(len(movies)):
- results[movies[i]] = scores[i]
- return results
- if __name__ == '__main__':
- results = get_list(True)
- for i in sorted(results, key = results.get):
- print '%2d %s' % (results[i], i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement