Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import bs4
- import re
- import sys
- def main():
- requests.packages.urllib3.disable_warnings()
- hedr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'}
- if len(sys.argv) == 1:
- print 'Argument required'
- return 1
- gal_url = str(sys.argv[1])
- rr = re.findall('http.+xhamster.com/photos/gallery/[0-9]+/(.+).html', gal_url)
- if len(rr) < 1:
- print 'Not xhamster gallery url'
- return 1
- r = requests.get(gal_url, headers = hedr)
- soup = bs4.BeautifulSoup(r.text, 'lxml')
- z = soup.find_all('div', {'class': 'gallery iItem '})
- list_images = list()
- page = 1
- while len(z) > 0:
- print 'Processing page', page
- sys.stdout.flush()
- for divim in z:
- alnk = divim.find('a')
- list_images.append(str(alnk['href']))
- page += 1
- r = requests.get(gal_url+'?page='+str(page), headers = hedr)
- soup = bs4.BeautifulSoup(r.text, 'lxml')
- z = soup.find_all('div', {'class': 'gallery iItem '})
- print 'Found %i images' % len(list_images)
- sys.stdout.flush()
- for i in xrange(len(list_images)):
- print 'Downloading image', i+1,
- sys.stdout.flush()
- try:
- r = requests.get(list_images[i], headers = hedr)
- soup = bs4.BeautifulSoup(r.text, 'lxml')
- z = soup.find('img', {'id': 'imgSized', 'class': 'slideImg'})
- r = requests.get(z['src'], headers = hedr)
- f = open(str(rr[0])+'_'+str(i+1)+'.'+str(z['src'].split('.')[-1]), 'wb')
- f.write(r.content)
- f.close()
- except KeyboardInterrupt:
- raise KeyboardInterrupt
- except:
- print 'Fail:', sys.exc_info()[1]
- continue
- print 'OK'
- sys.stdout.flush()
- return 0
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement