Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- __author__ = 'Mauro Baraldi'
- __email__ = 'mauro.baraldi@gmail.com'
- import codecs
- import urllib
- import BeautifulSoup
- url = 'http://hackershelf.com/book/%i/'
- path = '/home/mauro/tmp/hacker_bookshelf.txt'
- if __name__ == '__main__':
- for i in range(1,200):
- page = BeautifulSoup.BeautifulSoup(urllib.urlopen(url % i).read())
- try:
- title = page.find('h1', {'id':'book_title'}).text
- link = page.find('a', {'class':'primary_link'}).text
- with codecs.open(path,'a', encoding='utf8') as bookshelf:
- bookshelf.write('%i;%s;%s\n' % (i, title, link))
- except AttributeError:
- pass
Add Comment
Please, Sign In to add comment