Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import lxml.html
- def extract(el, css_sel):
- ms = el.cssselect(css_sel)
- return None if len(ms) != 1 else ms[0].text
- def get_albums(aid):
- url = "http://www.discogs.com/artist/" + str(aid) + "?limit=500"
- r = requests.get(url, headers={'User-Agent': 'I wish your API was better?'})
- root = lxml.html.fromstring(r.text)
- albums = []
- for row in root.cssselect("#artist tr"):
- section = extract(row, "td h3")
- if section is not None:
- if section == "Albums": continue
- if section == "Singles & EPs": break
- id = row.get("data-object-id")
- type = row.get("data-object-type")
- title = extract(row, ".title a")
- formats = extract(row, ".title .format")
- year = extract(row, "td[data-header=\"Year: \"]")
- albums.append((id, type, title, formats, year))
- return albums
- if __name__ == "__main__":
- for album in get_albums(99952):
- print album
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement