Guest User

Untitled

a guest
Oct 10th, 2017
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.21 KB | None | 0 0
  1. import Image
  2. from BeautifulSoup import BeautifulSoup as bs
  3. import urlparse
  4. from urllib2 import urlopen
  5. from urllib import urlretrieve
  6. import os
  7. import sys
  8. import re
  9. import base64
  10.  
  11. class Ysi(object):
  12. def __init__(self):
  13. self.set_picture_folder()
  14.  
  15.  
  16. """ Sets where the picture will be loaded. Normally
  17. wouldn't need to call this by user """
  18. def set_picture_folder(self, temp_folder = ""):
  19. if temp_folder == "":
  20. self.out_folder = "/tmp"
  21. else:
  22. self.out_folder = temp_folder
  23.  
  24.  
  25. """ Get picture from the Internet """
  26. def get_picture(self, url, username="", password=""):
  27. try:
  28. soup = bs(urlopen(url))
  29. except IOError, e:
  30. # Check if error caused by authentication
  31. if not hasattr(e, 'code') or e.code != 401:
  32. print "This page isn't protected by authentication."
  33. print 'But we failed for another reason.'
  34. sys.exit(1)
  35. # If it was caused by authentication, try to authenticate
  36. self.__handle_page_authentication(e, username, password)
  37.  
  38. parsed = list(urlparse.urlunparse(url))
  39.  
  40. for image in soup.findAll("img"):
  41. print "Image: %(src)s" % image
  42. filename = image["src"].split("/")[-1]
  43. parsed[2] = image["src"]
  44. outpath = os.path.join(self.out_folder, filename)
  45.  
  46. if image["src"].lower().startswith("http"):
  47. urlretrieve(image["src"], outpath)
  48. else:
  49. urlretrieve(urlparse.urlparse(parsed, outpath))
  50.  
  51.  
  52. """ This is private method that should be called only if a wanted page has a
  53. authentication """
  54. def __handle_page_authentication(self, e, username, password):
  55. authline = e.headers['www-authenticate']
  56. authobj = re.compile( r'''(?:\s*www-authenticate\s*:)?\s*(\w*)\s+realm=['"]([^'"]+)['"]''',
  57. re.IGNORECASE)
  58. matchobj = authobj.match(authline)
  59.  
  60. # if the authline isn't matched by the regular expression
  61. # then something is wrong
  62. # Well we don't know exactly what pythonchallenge uses but let's hope it works:D
  63. if not matchobj:
  64. print 'Error: The authentication header is badly formed.'
  65. print authline
  66. sys.exit(1)
  67.  
  68. scheme = matchobj.group(1)
  69. realm = matchobj.group(2)
  70. # here we've extracted the scheme
  71. # and the realm from the header
  72.  
  73. if scheme.lower() != 'basic':
  74. print 'Error: Program can only access to basic authentication pages.'
  75. sys.exit(1)
  76. base64string = base64.encodestring(
  77. '%s:%s' % (username, password))[:-1]
  78. authheader = "Basic %s" % base64string
  79. req.add_header("Authorization", authheader)
  80.  
  81. # here we shouldn't fail if the username/password is right
  82. try:
  83. handle = urllib2.urlopen(req)
  84. except IOError, e:
  85. print "Error: password or username is wrong"
  86. sys.exit(1)
Add Comment
Please, Sign In to add comment