Advertisement
Ahmed-_-Taha

trythis

Aug 3rd, 2014
201
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.00 KB | None | 0 0
  1. #!/usr/bin/env python
  2. from test import support
  3. import unittest
  4. import urllib.parse
  5. from bs4 import BeautifulSoup
  6. import urllib.request
  7. from sys import argv
  8. url = "http://www.univ-usto.dz"
  9. urls =[]
  10. try:
  11.     html = urllib.request.urlopen(url)
  12. except:
  13.     print ("[-] No such website")
  14.     exit()
  15. soup = BeautifulSoup(html)
  16. #extension = raw_input('type in an extension:')
  17.  
  18. for tag in soup.find_all('a'):
  19.     try:
  20.         tag["href"] = urllib.parse.urljoin(url, tag['href'])
  21.         if tag['href'] not in urls and '.pdf' in tag['href']:
  22.             newpdf = tag['href'].split("/")
  23.             name = newpdf[-1]
  24.             resp = urllib.request.urlopen(tag['href'])
  25.             meta_data =  resp.info()
  26.             fsize = int(meta_data.getheaders("Content-Length")[0])
  27.             print ("Downloading --> %s \t size: %s "%(name, fsize))
  28.             f = open("name.txt", "wb")
  29.             f.write(resp.read())
  30.             f.close
  31.             urls.append(tag["href"])
  32.             #print tag['href']
  33.         else:
  34.             print tag['href']
  35.     except KeyboardInterrupt:
  36.         print (" User hit CTRL+C")
  37.         exit()
  38.     except:
  39.         pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement