Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib
- import lxml.html
- def get_pages(url):
- page = lxml.html.parse(urllib.urlopen(url))
- mp3 = page.xpath("//a[contains(@href, '.mp3')]")
- for item in mp3: print item.get('href')
- nextpage = page.xpath("//span[text()='Next']/..")
- if nextpage: get_pages(nextpage[0].get('href'))
- get_pages("http://thehistoryofrome.typepad.com/")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement