inefficient link crawler

#! /usr/bin/env python3


import re
import sys
from urllib import request, error
from urllib.parse import urlparse


def extract_urls(fname, path):
    """Extract urls from file.

    Open file in text mode then search
    for all URLs using a simple regex and
    return them as a list.
    """

    # how to find links
    template = r'(a.+?href=")([^"]+)"'

    # parse file and save the links
    links = set()
    with open(fname) as fin:
        regen = re.finditer(template, fin.read())
        for regex in regen:
            string = regex.group(2)
            if urlparse(string).netloc:
                pass
            elif string.startswith("#"):
                continue    # no need for fragments
            elif string.startswith(".."):
                # a "../" relative link
                string = path[:path.rfind("/")] +\
                         string[2:]
            else:
                string = path + "/" + string.strip("./")
            parsed = urlparse(string)
            string = parsed.scheme + "://" + parsed.netloc +\
                     parsed.path
            links.add(string)

    return list(links)


def main(argc, argv):
    if argc != 2:
        print("Usage: {} URL".format(argv[0]))
        return 0

    # get name
    parsed = urlparse(argv[1])
    regex = re.search(r"(\w+\.)[a-z]+$", parsed.path,
                      flags=re.I)
    urlPath = parsed.scheme + "://" + parsed.netloc
    if regex:
        urlName = regex.group()
        urlPath += "/" + parsed.path[:regex.start()].strip("/")
    else:
        urlName = "index.html"

    # create object from link
    try:
        urlReq = request.Request(parsed.geturl())
    except ValueError:
        print("Error: Invalid URL")
        return 1
    else:
        urlReq.add_header("User-Agent", "Mozilla/5.0")

    # open it and wirte data to file
    try:
        uin = request.urlopen(urlReq)
    except error.URLError:
        print("Error: Invalid URL")
        return 1
    else:
        with open(urlName, "wb") as fout:
            fout.write(uin.read())

    print(extract_urls(urlName, urlPath))
    return 0


if __name__ == "__main__":
    rc = main(len(sys.argv), sys.argv)
    sys.exit(rc)