Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.23 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4. # to find out artist code, search artist and look url.
  5. artist = '30269/andy-warhol'
  6. date_from = '2015-09-01'
  7. date_to = '2019-09-30'
  8. estimate_from = 0
  9. estimate_to = 1000000
  10.  
  11. login_id = 'haijinpk@gmail.com'
  12. login_pw = 'Haijin1234'
  13.  
  14. path = 'https://www.artprice.com/artist/'+artist+'/lots/pasts/1/painting'
  15. path += '?idc=1'                                       # paintings only
  16. path += '?dt_from=' + date_from + '&dt_to=' + date_to  # date range
  17. path += '&estimation_from=' + str(estimate_from)       # estimate from
  18. path += '&estimation_to=' + str(estimate_to)           # estimate to
  19. path += '&estimation_idcurrency=154'                   # currency
  20.  
  21.  
  22. def login():
  23.     with requests.Session() as session:
  24.         data = 'utf8=%E2%9C%93'
  25.         data += '&login=' + login_id
  26.         data += '&pass=' + login_pw
  27.         res = session.post('https://www.artprice.com/login/login', data)
  28.         print(res)
  29.         save_all_pages(session)
  30.  
  31.  
  32. def save_all_pages(session):
  33.     req = session.get(path)
  34.     html = req.text
  35.     soup = BeautifulSoup(html, 'html.parser')
  36.  
  37.     footer = soup.find('div', class_='footer-search-pagination')
  38.     lastpage = footer.find_all('li', class_='page')[-1]
  39.     lastpage_num = int(lastpage.text.strip())
  40.  
  41.     for i in range(1, lastpage_num-1):
  42.         save_page(session, i)
  43.  
  44.  
  45. def save_page(session, page):
  46.     page_path = path + '&p=' + str(page)
  47.     req = session.get(path)
  48.     html = req.text
  49.     soup = BeautifulSoup(html, 'html.parser')
  50.  
  51.     index = 1
  52.     for container in soup.find_all('div', class_='lot-container'):
  53.         filename = "%d-%d" % (page, index)
  54.         txt_container = container.find('div', class_='col-xs-8 col-sm-6')
  55.         with open('%s.html' % filename, 'wt') as file:
  56.             file.write(str(txt_container))
  57.  
  58.         img_container = container.find('div', class_='lot-images-container')
  59.         img = img_container.find('img')
  60.         original = img.get('src')[:-3] + 'original'
  61.         with open('%s.jpg' % filename, 'wb') as file:
  62.             jpg = session.get(original)
  63.             file.write(jpg.content)
  64.  
  65.         index += 1
  66.  
  67.  
  68. if __name__ == "__main__":
  69.     with requests.Session() as session:
  70.         login()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement