Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- # to find out artist code, search artist and look url.
- artist = '30269/andy-warhol'
- date_from = '2015-09-01'
- date_to = '2019-09-30'
- estimate_from = 0
- estimate_to = 1000000
- login_id = 'haijinpk@gmail.com'
- login_pw = 'Haijin1234'
- path = 'https://www.artprice.com/artist/'+artist+'/lots/pasts/1/painting'
- path += '?idc=1' # paintings only
- path += '?dt_from=' + date_from + '&dt_to=' + date_to # date range
- path += '&estimation_from=' + str(estimate_from) # estimate from
- path += '&estimation_to=' + str(estimate_to) # estimate to
- path += '&estimation_idcurrency=154' # currency
- def login():
- with requests.Session() as session:
- data = 'utf8=%E2%9C%93'
- data += '&login=' + login_id
- data += '&pass=' + login_pw
- res = session.post('https://www.artprice.com/login/login', data)
- print(res)
- save_all_pages(session)
- def save_all_pages(session):
- req = session.get(path)
- html = req.text
- soup = BeautifulSoup(html, 'html.parser')
- footer = soup.find('div', class_='footer-search-pagination')
- lastpage = footer.find_all('li', class_='page')[-1]
- lastpage_num = int(lastpage.text.strip())
- for i in range(1, lastpage_num-1):
- save_page(session, i)
- def save_page(session, page):
- page_path = path + '&p=' + str(page)
- req = session.get(path)
- html = req.text
- soup = BeautifulSoup(html, 'html.parser')
- index = 1
- for container in soup.find_all('div', class_='lot-container'):
- filename = "%d-%d" % (page, index)
- txt_container = container.find('div', class_='col-xs-8 col-sm-6')
- with open('%s.html' % filename, 'wt') as file:
- file.write(str(txt_container))
- img_container = container.find('div', class_='lot-images-container')
- img = img_container.find('img')
- original = img.get('src')[:-3] + 'original'
- with open('%s.jpg' % filename, 'wb') as file:
- jpg = session.get(original)
- file.write(jpg.content)
- index += 1
- if __name__ == "__main__":
- with requests.Session() as session:
- login()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement