Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from multiprocessing import Pool
- import re
- from urllib import urlopen
- from lxml.html import document_fromstring
- def _get_viewers(tuple):#season, episode):
- season, episode= tuple
- print season, '\t', episode
- p=re.compile('Viewers: (?P<n>\d+(\.\d+)?)')
- url= 'http://the-big-bang-theory.com/episodeguide/episode/%d%02d'
- url= url % (season, episode)
- raw= urlopen(url).read()
- dom= document_fromstring(raw)
- ps= dom.cssselect('p')
- ps= [e.text_content() for e in ps]
- ps= [e for e in ps if 'Viewers' in e]
- if len(ps) == 0: return
- m= p.search(ps[0])
- return float(m.groupdict()['n'])
- def get_viewers():
- res= {}
- l= [(s,e) for s in xrange(1,7) for e in xrange(1,30)]
- pool= Pool(20)
- v= pool.map(_get_viewers, l)
- return zip(l,v)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement