Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime
- import os
- import re
- import random
- from urllib import request
- class ImageCrawler(object):
- @staticmethod
- def get_url(url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
- }
- url = request.Request(url, headers=headers)
- target_html = request.urlopen(url)
- html_content = target_html.read()
- html_content = html_content.decode('utf-8', "ignore")
- return html_content
- @staticmethod
- def get_specific_img(target_html, input_array):
- img_list = []
- for input1 in input_array:
- reg = r'https:\/\/[^\s,"]*\%s.jpg' % input1
- img_reg = re.compile(reg)
- try:
- img_list.append(re.findall(img_reg, target_html)[0])
- except:
- print('cannot find picture %s' % input1)
- return img_list
- @staticmethod
- def get_img(target_html):
- reg = r'https:\/\/[^\s,"]*\.jpg'
- img_reg = re.compile(reg)
- img_list = re.findall(img_reg, target_html)
- return img_list
- @staticmethod
- def save_img(target_img_list):
- file_dir = os.getcwd()
- file_path = os.path.join(file_dir, 'downlosd_image')
- check_exists = os.path.exists(file_path)
- if not check_exists:
- os.makedirs(file_path)
- count_num = 0
- for img in target_img_list:
- count_num = count_num + 1
- now_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
- image_name = r"{0}\{1}_image_{2}.jpg".format(file_path, now_time, count_num)
- request.urlretrieve(img, image_name)
- print("Save image:{0}".format(image_name))
- # input_array = ['DSC01533', 'DSC01536', 'DSC01543']
- if __name__ == '__main__':
- imageCrawler = ImageCrawler()
- test_target_html = imageCrawler.get_url(
- "https://www.ecu.edu.au/service-centres/MACSC/gallery/gallery.php?folder=152")
- option = input("choose mosde:"
- "1.Download specific thumbnails "
- "2.Download ALL thumbnails "
- "3.Download images in a range "
- "4.Download random n pictures ")
- if option == '1':
- n = input("input number of pictures:")
- input_array = [input("input picture name:") for i in range(int(n))]
- test_img_list = imageCrawler.get_specific_img(test_target_html, input_array)
- elif option == '2':
- test_img_list = imageCrawler.get_img(test_target_html)
- elif option == '3':
- range_left = int(input("input range left:")[3:])
- range_right = int(input("input range right:")[3:])
- alphabet = 'DSC0'
- input_array = []
- for picture_digit in range(range_left, range_right, 1):
- picture_code = alphabet + str(picture_digit)
- input_array.append(picture_code)
- test_img_list = imageCrawler.get_specific_img(test_target_html, input_array)
- elif option == '4':
- n = int(input("input number of random pictures:"))
- test_img_list = imageCrawler.get_img(test_target_html)
- test_img_list = random.choices(test_img_list, k=n)
- else:
- print('wrong command')
- imageCrawler.save_img(test_img_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement