Guest User

Untitled

a guest
Dec 13th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.56 KB | None | 0 0
  1. import urllib.request
  2. import urllib.parse
  3. import html
  4. import re
  5. import sys
  6.  
  7. def main():
  8. if len(sys.argv) < 2:
  9. print_usage()
  10. sys.exit(1)
  11. else:
  12. #Parse the arguments
  13. url = sys.argv[1]
  14. start_index = 0
  15. if len(sys.argv) > 2 and sys.argv[2].isdigit():
  16. start_index = int(sys.argv[2]) - 1
  17.  
  18. #Read the url content
  19. fp = urllib.request.urlopen(url)
  20. data = fp.read()
  21. content = data.decode('utf8')
  22. fp.close()
  23.  
  24. #Match the a elements and capture the href values ending in .mp3
  25. match = re.findall("<a href=\"(?P<name>.+\.mp3)\">", content)
  26.  
  27. #iterate on all found items
  28. print("Found", len(match), "item(s)")
  29. index = start_index
  30. total_items = len(match)
  31. while(index < total_items):
  32. item = match[index]
  33. #Remove text before slash / character
  34. last_index = item.rfind("/")
  35. if last_index >= 0:
  36. item = item[last_index+1:]
  37. file_name = urllib.parse.unquote(html.unescape(item))
  38. print("Processing", index +1, "of", total_items, file_name)
  39. file_url = fp.url + html.unescape(item)
  40. download(file_url, file_name)
  41. index += 1
  42.  
  43. def download(url, filename):
  44. print("requesting", url)
  45. fp = urllib.request.urlopen(url)
  46. data = fp.read()
  47. with open(filename, 'wb') as new_file:
  48. new_file.write(data)
  49. fp.close()
  50.  
  51. def print_usage():
  52. print("Usage: python3 download.py url_of_index_page [start_at]")
  53.  
  54. main()
Add Comment
Please, Sign In to add comment