Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import Image
- from BeautifulSoup import BeautifulSoup as bs
- import urlparse
- from urllib2 import urlopen
- from urllib import urlretrieve
- import os
- import sys
- import re
- import base64
- class Ysi(object):
- def __init__(self):
- self.set_picture_folder()
- """ Sets where the picture will be loaded. Normally
- wouldn't need to call this by user """
- def set_picture_folder(self, temp_folder = ""):
- if temp_folder == "":
- self.out_folder = "/tmp"
- else:
- self.out_folder = temp_folder
- """ Get picture from the Internet """
- def get_picture(self, url, username="", password=""):
- try:
- soup = bs(urlopen(url))
- except IOError, e:
- # Check if error caused by authentication
- if not hasattr(e, 'code') or e.code != 401:
- print "This page isn't protected by authentication."
- print 'But we failed for another reason.'
- sys.exit(1)
- # If it was caused by authentication, try to authenticate
- self.__handle_page_authentication(e, username, password)
- parsed = list(urlparse.urlunparse(url))
- for image in soup.findAll("img"):
- print "Image: %(src)s" % image
- filename = image["src"].split("/")[-1]
- parsed[2] = image["src"]
- outpath = os.path.join(self.out_folder, filename)
- if image["src"].lower().startswith("http"):
- urlretrieve(image["src"], outpath)
- else:
- urlretrieve(urlparse.urlparse(parsed, outpath))
- """ This is private method that should be called only if a wanted page has a
- authentication """
- def __handle_page_authentication(self, e, username, password):
- authline = e.headers['www-authenticate']
- authobj = re.compile( r'''(?:\s*www-authenticate\s*:)?\s*(\w*)\s+realm=['"]([^'"]+)['"]''',
- re.IGNORECASE)
- matchobj = authobj.match(authline)
- # if the authline isn't matched by the regular expression
- # then something is wrong
- # Well we don't know exactly what pythonchallenge uses but let's hope it works:D
- if not matchobj:
- print 'Error: The authentication header is badly formed.'
- print authline
- sys.exit(1)
- scheme = matchobj.group(1)
- realm = matchobj.group(2)
- # here we've extracted the scheme
- # and the realm from the header
- if scheme.lower() != 'basic':
- print 'Error: Program can only access to basic authentication pages.'
- sys.exit(1)
- base64string = base64.encodestring(
- '%s:%s' % (username, password))[:-1]
- authheader = "Basic %s" % base64string
- req.add_header("Authorization", authheader)
- # here we shouldn't fail if the username/password is right
- try:
- handle = urllib2.urlopen(req)
- except IOError, e:
- print "Error: password or username is wrong"
- sys.exit(1)
Add Comment
Please, Sign In to add comment