Advertisement
Guest User

tags_joyreactor

a guest
Jul 28th, 2014
376
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.59 KB | None | 0 0
  1. from bs4 import BeautifulSoup as bs
  2. from urllib2 import urlopen
  3. from time import sleep
  4.  
  5. soup = bs(html,"lxml")
  6. gen_urls = [x for x in range(80000, 80002)]
  7.  
  8. base_url = 'http://joyreactor.cc/new/'
  9.  
  10. enlist_urls =[base_url+str(item) for item in gen_urls]
  11.  
  12. DATA = {}
  13.  
  14. for current_page in enlist_urls:
  15.     html = urlopen(current_page).read()
  16.  
  17.    
  18.  
  19.     for item in soup.findAll('h2',attrs={'class':'taglist'}):
  20.         DATA[current_page] = item.text
  21.         sleep(1)
  22.  
  23. if __name__ == '__main__':
  24.     pairs = DATA.items()
  25.  
  26.     #pairs.sort(key=lambda x: x[1], reverse=True)
  27.     for p in pairs:
  28.         print p[0], p[1]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement