Untitled

import urllib.request
import urllib.parse
import html
import re
import sys

def main():
    if len(sys.argv) < 2:
        print_usage()
        sys.exit(1)
    else:
        #Parse the arguments
        url = sys.argv[1]
        start_index = 0
        if len(sys.argv) > 2 and sys.argv[2].isdigit():
            start_index = int(sys.argv[2]) - 1

        #Read the url content
        fp = urllib.request.urlopen(url)
        data = fp.read()
        content = data.decode('utf8')
        fp.close()

        #Match the a elements and capture the href values ending in .mp3
        match = re.findall("<a href=\"(?P<name>.+\.mp3)\">", content)

        #iterate on all found items
        print("Found", len(match), "item(s)")
        index = start_index
        total_items = len(match)
        while(index < total_items):
            item = match[index]
            #Remove text before slash / character
            last_index = item.rfind("/")
            if last_index >= 0:
                item = item[last_index+1:]
            file_name = urllib.parse.unquote(html.unescape(item))
            print("Processing", index +1, "of", total_items, file_name)
            file_url = fp.url + html.unescape(item)
            download(file_url, file_name)
            index += 1

def download(url, filename):
    print("requesting", url)
    fp = urllib.request.urlopen(url)
    data = fp.read()
    with open(filename, 'wb') as new_file:
        new_file.write(data)
    fp.close()

def print_usage():
    print("Usage: python3 download.py url_of_index_page [start_at]")

main()