Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request
- import urllib.parse
- import html
- import re
- import sys
- def main():
- if len(sys.argv) < 2:
- print_usage()
- sys.exit(1)
- else:
- #Parse the arguments
- url = sys.argv[1]
- start_index = 0
- if len(sys.argv) > 2 and sys.argv[2].isdigit():
- start_index = int(sys.argv[2]) - 1
- #Read the url content
- fp = urllib.request.urlopen(url)
- data = fp.read()
- content = data.decode('utf8')
- fp.close()
- #Match the a elements and capture the href values ending in .mp3
- match = re.findall("<a href=\"(?P<name>.+\.mp3)\">", content)
- #iterate on all found items
- print("Found", len(match), "item(s)")
- index = start_index
- total_items = len(match)
- while(index < total_items):
- item = match[index]
- #Remove text before slash / character
- last_index = item.rfind("/")
- if last_index >= 0:
- item = item[last_index+1:]
- file_name = urllib.parse.unquote(html.unescape(item))
- print("Processing", index +1, "of", total_items, file_name)
- file_url = fp.url + html.unescape(item)
- download(file_url, file_name)
- index += 1
- def download(url, filename):
- print("requesting", url)
- fp = urllib.request.urlopen(url)
- data = fp.read()
- with open(filename, 'wb') as new_file:
- new_file.write(data)
- fp.close()
- def print_usage():
- print("Usage: python3 download.py url_of_index_page [start_at]")
- main()
Add Comment
Please, Sign In to add comment