Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.15 KB | None | 0 0
  1. import urllib.parse
  2. import urllib
  3. import os
  4. import sys
  5.  
  6. try:
  7. from bs4 import BeautifulSoup
  8. except ImportError:
  9. print ("[*] Please download and install Beautiful Soup first!")
  10. sys.exit(0)
  11.  
  12.  
  13. url = input("[+] Enter the url: ")
  14. download_path = input("[+] Enter the download path in full: ")
  15.  
  16.  
  17.  
  18. try:
  19. #to make it look legit for the url
  20. headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0"}
  21.  
  22. i = 0
  23.  
  24.  
  25. from urllib.request import urlopen
  26. with urlopen(url) as url:
  27. soup = BeautifulSoup(html_page, "???")
  28. '''
  29. request = urllib.request(url, None, headers)
  30. html = urllib.urlopen(request)
  31. soup = BeautifulSoup(html.read()) #to parse the website
  32. '''
  33.  
  34. for tag in soup.findAll('a', href=True): #find <a> tags with href in it so you know it is for urls
  35. #so that if it doesn't contain the full url it can the url itself to it for the download
  36. tag['href'] = urlparse.urljoin(url, tag['href'])
  37.  
  38. #this is pretty easy we are getting the extension (splitext) from the last name of the full url(basename)
  39. #the spiltext splits it into the filename and the extension so the [1] is for the second part(the extension)
  40. if os.path.splitext(os.path.basename(tag['href']))[1] == '.pdf':
  41. current = urllib2.urlopen(tag['href'])
  42. print ('\n[*] Downloading: %s ') %(os.path.basename(tag['href']))
  43.  
  44. f = open(download_path + "\\" +os.path.basename(tag['href'], "wb"))
  45. f.write(current.read())
  46. f.close()
  47. i+=1
  48.  
  49. print ("\n[*] Downloaded %d files") %(i+1)
  50. input("[+] Press any key to exit...")
  51.  
  52. except KeyboardInterrupt:
  53. print ("[*] Exiting...")
  54. sys.exit(1)
  55.  
  56. except URLError as e:
  57. print ("[*] Could not get information from server!!")
  58. sys.exit(2)
  59.  
  60. except:
  61. print ("I don't know the problem but sorry!!")
  62. sys.exit(3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement