Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- from datetime import date
- from calendar import month_name
- from threading import Thread
- from queue import Queue
- from lxml import html
- import requests
- # test
- s800x600 = u'800\xd7600'
- MAIN_URL = 'https://www.smashingmagazine.com/'
- #ADD_URL = f'{year}/{month}/desktop-wallpaper-calendars-{Month}-{year_calc}/'
- ADD_URL = '{year}/{month}/desktop-wallpaper-calendars-{Month}-{year_calc}/'
- WITH_CALENDAR = 2
- WITHOUT_CALENDAR = 3
- ACTUAL = WITH_CALENDAR
- def is_year_valid(year):
- """ validate year
- :type year: int
- :rtype: int
- """
- year = int(year)
- today_year = date.today().year
- if 2000 <= year <= today_year:
- return year
- raise argparse.ArgumentTypeError("%s is not between 2000 and %s" % (year, today_year))
- def is_month_valid(month):
- """ validate month
- :type month: int
- :rtype: int
- """
- month = int(month)
- if 1 <= month <= 12:
- return month
- raise argparse.ArgumentTypeError("%s is not between 1 and 12" % month)
- def get_wallps_urls(month, year):
- """ get all possible wallp urls by resolution
- :type month: int
- :type year: int
- :return: list(str)
- """
- urls = []
- month_calc = month_name[month % 12 + 1].lower()
- # if it is a december (12) => calculated year must be incremented
- year_calc = year + 1 if month == 12 else year
- # number month should be like '04'
- month = str(month).zfill(2)
- r = requests.get(MAIN_URL+ADD_URL.format(
- month=month, year=year, Month=month_calc, year_calc=year_calc
- ))
- if r.status_code == 200:
- tree = html.fromstring(r.content)
- urls.extend(tree.xpath(u"//html/body/main//div[contains(@class, 'col')\
- and contains(@class, 'main')]\
- /article[contains(@class, 'tag-wallpapers')]\
- /ul[position() > 3]\
- /li[position()={calendar}]\
- /a[text()='{size}']/@href".format(calendar=ACTUAL, size=s800x600)))
- """
- tree.xpath("//html/body/main//div[@id='content']")
- """
- return urls
- def fetch_by_urls(urls):
- """ download images given urls
- and save in file
- :type urls: list(str)
- :return: None
- """
- print('start fetching %s images' % len(urls))
- for url in urls:
- r = requests.get(url, stream=True)
- if r.status_code == 200:
- with open(url.split('/')[-1], 'wb') as f:
- for chunk in r:
- f.write(chunk)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='wallpapers downloader')
- parser.add_argument('--month', metavar='4', type=is_month_valid,
- help='choose a month number', required=True,)
- parser.add_argument('--year', metavar='2017', type=is_year_valid,
- help='choose a year number', required=True)
- # parser.add_argument('--resolution', metavar='800x600', type=str,
- # help='choose a resolution', required=True)
- # parser.add_argument('--resolution', metavar='800x600', type=str,
- # help='choose a resolution', required=True)
- args = parser.parse_args()
- urls = get_wallps_urls(month=args.month, year=args.year)
- if urls:
- queue = Queue()
- t1 = Thread(target=fetch_by_urls, args=(urls[:len(urls)/2],))
- t2 = Thread(target=fetch_by_urls, args=(urls[len(urls)/2:],))
- queue.put(t1)
- t1.start()
- queue.put(t2)
- t2.start()
- queue.join()
- print('done')
- else:
- print('something wrong')
- exit(1)
Advertisement
Add Comment
Please, Sign In to add comment