python 3 mgewiki image scraper

import argparse
import os
import re
import shutil
import time
import urllib.error
import urllib.request

def _parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--root', default = os.path.dirname(os.path.abspath(__file__)))
    parser.add_argument('--rate', help = 'min seconds per request', type = float, default = 1.0)
    return parser.parse_args()

def wrapped_urlopen(request, f_open = urllib.request.urlopen):
    try:
        return f_open(request)
    except urllib.error.HTTPError as e:
        print('HTTPError', e.code, e.reason)
    except urllib.error.URLError as e2:
        print('URLError', e2.reason)
    return None

def wait():
    global clock, rate
    time.sleep(max(0, rate - time.perf_counter() + clock))
    clock = time.perf_counter()
    return None

def get(url, headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0'}):
    wait()
    print('Scraping: %s' %url)
    request = urllib.request.Request(url = url, headers = headers)
    return wrapped_urlopen(request)

def _main(args):
    global clock, rate
    clock, rate = 0, args.rate
    outdir = os.path.join(args.root, 'images')
    if not os.path.exists(outdir): os.mkdir(outdir)
    base_url = 'http://readonly.mgewiki.com'
    gallery_url = 'http://readonly.mgewiki.com/index.php?title=Special:NewFiles'
    offset = ''
    while gallery_url != None:
        gallery_dat = get(gallery_url).read().decode('utf-8')
        tmp_offset = re.findall('href="/index.php\?title=Special:NewFiles&amp;offset=([^&]*?)"', gallery_dat, re.DOTALL)[0]
        gallery_url = None if tmp_offset == offset else '%s/index.php?title=Special:NewFiles&offset=%s' %(base_url, tmp_offset)
        offset = tmp_offset
        for url in ['%s/w/File:%s' %(base_url, file) for file in re.findall('href="/w/File:(.*?)"', gallery_dat, re.DOTALL)]:
            image_path = os.path.join(outdir, url.split(':')[-1])
            if os.path.exists(image_path): continue
            image_url = '%s/images/%s' %(base_url, re.findall('href="/images/(?!thumb)(.*?)"', get(url).read().decode('utf-8'), re.DOTALL)[0])
            if os.path.exists(os.path.join(outdir, image_url.split('/')[-1])): continue
            with get(image_url) as response, open(os.path.join(outdir, image_url.split('/')[-1]), 'wb') as f: shutil.copyfileobj(response, f)
    return None

if __name__ == '__main__':
    args = _parse_args()
    try:
        _main(args)
    except KeyboardInterrupt:
        pass