Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from test import support
- import unittest
- import urllib.parse
- from bs4 import BeautifulSoup
- import urllib.request
- from sys import argv
- url = "http://www.univ-usto.dz"
- urls =[]
- try:
- html = urllib.request.urlopen(url)
- except:
- print ("[-] No such website")
- exit()
- soup = BeautifulSoup(html)
- #extension = raw_input('type in an extension:')
- for tag in soup.find_all('a'):
- try:
- tag["href"] = urllib.parse.urljoin(url, tag['href'])
- if tag['href'] not in urls and '.pdf' in tag['href']:
- newpdf = tag['href'].split("/")
- name = newpdf[-1]
- resp = urllib.request.urlopen(tag['href'])
- meta_data = resp.info()
- fsize = int(meta_data.getheaders("Content-Length")[0])
- print ("Downloading --> %s \t size: %s "%(name, fsize))
- f = open("name.txt", "wb")
- f.write(resp.read())
- f.close
- urls.append(tag["href"])
- #print tag['href']
- else:
- print tag['href']
- except KeyboardInterrupt:
- print (" User hit CTRL+C")
- exit()
- except:
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement