kAldown

ostr

Jun 22nd, 2017
159
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.70 KB | None | 0 0
  1. import argparse
  2. from datetime import date
  3. from calendar import month_name
  4. from threading import Thread
  5. from queue import Queue
  6.  
  7. from lxml import html
  8. import requests
  9.  
  10.  
  11. # test
  12. s800x600 = u'800\xd7600'
  13.  
  14. MAIN_URL = 'https://www.smashingmagazine.com/'
  15. #ADD_URL = f'{year}/{month}/desktop-wallpaper-calendars-{Month}-{year_calc}/'
  16. ADD_URL = '{year}/{month}/desktop-wallpaper-calendars-{Month}-{year_calc}/'
  17. WITH_CALENDAR = 2
  18. WITHOUT_CALENDAR = 3
  19.  
  20. ACTUAL = WITH_CALENDAR
  21.  
  22.  
  23. def is_year_valid(year):
  24.     """ validate year
  25.  
  26.    :type year: int
  27.    :rtype:  int
  28.    """
  29.     year = int(year)
  30.     today_year = date.today().year
  31.     if 2000 <= year <= today_year:
  32.         return year
  33.     raise argparse.ArgumentTypeError("%s is not between 2000 and %s" % (year, today_year))
  34.  
  35.  
  36. def is_month_valid(month):
  37.     """ validate month
  38.  
  39.    :type month: int
  40.    :rtype: int
  41.    """
  42.     month = int(month)
  43.     if 1 <= month <= 12:
  44.         return month
  45.     raise argparse.ArgumentTypeError("%s is not between 1 and 12" % month)
  46.  
  47.  
  48. def get_wallps_urls(month, year):
  49.     """ get all possible wallp urls by resolution
  50.  
  51.    :type month: int
  52.    :type year: int
  53.    :return: list(str)
  54.    """
  55.     urls = []
  56.  
  57.     month_calc = month_name[month % 12 + 1].lower()
  58.     # if it is a december (12) => calculated year must be incremented
  59.     year_calc = year + 1 if month == 12 else year
  60.     # number month should be like '04'
  61.     month = str(month).zfill(2)
  62.  
  63.     r = requests.get(MAIN_URL+ADD_URL.format(
  64.         month=month, year=year, Month=month_calc, year_calc=year_calc
  65.     ))
  66.  
  67.     if r.status_code == 200:
  68.         tree = html.fromstring(r.content)
  69.         urls.extend(tree.xpath(u"//html/body/main//div[contains(@class, 'col')\
  70.                                                and contains(@class, 'main')]\
  71.                            /article[contains(@class, 'tag-wallpapers')]\
  72.                            /ul[position() > 3]\
  73.                            /li[position()={calendar}]\
  74.                            /a[text()='{size}']/@href".format(calendar=ACTUAL, size=s800x600)))
  75.     """
  76.    tree.xpath("//html/body/main//div[@id='content']")
  77.    """
  78.     return urls
  79.  
  80.  
  81. def fetch_by_urls(urls):
  82.     """ download images given urls
  83.    and save in file
  84.  
  85.    :type urls: list(str)
  86.    :return: None
  87.    """
  88.     print('start fetching %s images' % len(urls))
  89.     for url in urls:
  90.         r = requests.get(url, stream=True)
  91.         if r.status_code == 200:
  92.             with open(url.split('/')[-1], 'wb') as f:
  93.                 for chunk in r:
  94.                     f.write(chunk)
  95.  
  96.  
  97. if __name__ == '__main__':
  98.  
  99.     parser = argparse.ArgumentParser(description='wallpapers downloader')
  100.     parser.add_argument('--month', metavar='4', type=is_month_valid,
  101.                         help='choose a month number', required=True,)
  102.     parser.add_argument('--year', metavar='2017', type=is_year_valid,
  103.                         help='choose a year number', required=True)
  104.     # parser.add_argument('--resolution', metavar='800x600', type=str,
  105.     #                     help='choose a resolution', required=True)
  106.     # parser.add_argument('--resolution', metavar='800x600', type=str,
  107.     #                     help='choose a resolution', required=True)
  108.  
  109.     args = parser.parse_args()
  110.  
  111.     urls = get_wallps_urls(month=args.month, year=args.year)
  112.     if urls:
  113.         queue = Queue()
  114.         t1 = Thread(target=fetch_by_urls, args=(urls[:len(urls)/2],))
  115.         t2 = Thread(target=fetch_by_urls, args=(urls[len(urls)/2:],))
  116.         queue.put(t1)
  117.         t1.start()
  118.         queue.put(t2)
  119.         t2.start()
  120.  
  121.         queue.join()
  122.  
  123.         print('done')
  124.     else:
  125.         print('something wrong')
  126.         exit(1)
Advertisement
Add Comment
Please, Sign In to add comment