Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup as bs
- from urllib2 import urlopen
- from time import sleep
- soup = bs(html,"lxml")
- gen_urls = [x for x in range(80000, 80002)]
- base_url = 'http://joyreactor.cc/new/'
- enlist_urls =[base_url+str(item) for item in gen_urls]
- DATA = {}
- for current_page in enlist_urls:
- html = urlopen(current_page).read()
- for item in soup.findAll('h2',attrs={'class':'taglist'}):
- DATA[current_page] = item.text
- sleep(1)
- if __name__ == '__main__':
- pairs = DATA.items()
- #pairs.sort(key=lambda x: x[1], reverse=True)
- for p in pairs:
- print p[0], p[1]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement