from bs4 import BeautifulSoup import urllib2 #http://love-python.blogspot.nl/2008/02/get-html-source-of-url.html url = 'http://services.runescape.com/m=itemdb_rs/top100.ws' usock = urllib2.urlopen(url) data = usock.read() usock.close() soup = BeautifulSoup(data).find_all('tbody') tr = soup.find_all('tr') obj = {} for thistr in tr: td = thistr.find_all('td') a = td[0].find_all('a')[0] obj[a.string] = td[5].string print obj #obj now contains all data in the format {"2":"107.6m",...}