Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os.path
- import os
- import sys
- import BeautifulSoup
- import urllib2
- import urllib
- import urlparse
- from clint import textui as text
- #a really basic version control.
- version = '1.0.3'
- #Get input from commandline switch - print an error if none are given.
- if len(sys.argv) < 2:
- print text.colored.red('> No arguments given!') #Just some fun with clint for pretty text.
- print text.colored.red('> Usage: $image.py http://url') #Same for here.
- sys.exit(1)
- #Check for --version & print out the version number if needed.
- if sys.argv[1] == "--version":
- print "version - "+version
- exit(0)
- #Get actual output from commandline.
- url = sys.argv[1]
- #Some error handing if the URL does not work!
- try:
- image_url = urllib2.urlopen(url).read()
- except Exception, err:
- print 'Could not connect to url: '+text.colored.red(err)
- sys.exit(1)
- soup = BeautifulSoup.BeautifulSoup(''.join(image_url))
- images = soup.findAll('img')
- #Loop to go through images, should probably be a function later.
- for image in images:
- relative_url = "%(src)s" % image
- filename = image["src"].split("/")[-1]
- relative_url_string = str(relative_url)
- if relative_url_string.startswith('http:') == True:
- print "Filname: "+text.colored.green(relative_url_string)
- output_dir = os.getcwd()
- out_path = os.path.join(output_dir,filename)
- print "Saving file to : "+text.colored.blue(out_path)
- urllib.urlretrieve(relative_url_string, out_path)
- #Still need to figure out how to enumerate these images.
- elif relative_url_string.startswith('http') == False:
- try:
- parsed_image_url = urlparse.urljoin(url,image['src'])
- output_dir = os.getcwd()
- out_path = os.path.join(output_dir,filename)
- print "Filename: "+text.colored.green(parsed_image_url)
- print "Saving file to : "+text.colored.blue(out_path)
- #urllib.urlretrieve(parsed_image_url, out_path)
- except Exception, err:
- print 'Could not parse image: '+text.colored.red(err)
- #print text.colored.red('URL does not start with http:')
- #print text.colored.red(relative_url_string)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement