Advertisement
Guest User

Scrapper for the-big-bang-theory.com

a guest
Mar 12th, 2013
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.80 KB | None | 0 0
  1. from multiprocessing import Pool
  2. import re
  3. from urllib import urlopen
  4. from lxml.html import document_fromstring
  5.  
  6. def _get_viewers(tuple):#season, episode):
  7.     season, episode= tuple
  8.     print season, '\t', episode
  9.     p=re.compile('Viewers: (?P<n>\d+(\.\d+)?)')
  10.     url= 'http://the-big-bang-theory.com/episodeguide/episode/%d%02d'
  11.     url= url % (season, episode)
  12.     raw= urlopen(url).read()
  13.     dom= document_fromstring(raw)
  14.     ps= dom.cssselect('p')
  15.     ps= [e.text_content() for e in ps]
  16.     ps= [e for e in ps if 'Viewers' in e]
  17.     if len(ps) == 0: return
  18.     m= p.search(ps[0])
  19.     return float(m.groupdict()['n'])
  20.      
  21. def get_viewers():
  22.     res= {}
  23.     l= [(s,e) for s in xrange(1,7) for e in xrange(1,30)]
  24.     pool= Pool(20)
  25.     v= pool.map(_get_viewers, l)
  26.     return zip(l,v)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement