Untitled

# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import urllib3


htmlToOpen = open('test.html')
#This opens the html file that you want to work in. To do multiple html files, use a for loop and change the value into variables.
#https://stackoverflow.com/questions/1120707/using-python-to-execute-a-command-on-every-file-in-a-folder

soup = BeautifulSoup(htmlToOpen, 'html.parser')
#this creates an HTML object
#i'm using html parser.

imageWebPath = ""
#This just declares an empty string variable that I will replace using the image web path

newImagePath = "image.jpg"
#this is the new image. To do multiple images, use a for loop and append a number to the file name using a counter of some sort.

imagesInHtmlFile = soup.findAll('img')
#this finds all of the images in the html file.
#https://stackoverflow.com/questions/43982002/extract-src-attribute-from-img-tag-using-beautifulsoup/47166671


for image in imagesInHtmlFile:
    imageWebPath = image['src']
    image['src'] = image['src'].replace(imageWebPath,newImagePath)

#This for loop finds all the image src attribute vallues and replaces it with a new path.
#You will need to add to this for loop for multiple images and use a counter.


htmlFile = open('whatever.html',"w+",encoding='utf-8')
#this opens a new whatever.html file. You will need to use variables instead of values to change the naming convention.
#Also, w+ creates a file if none exists.

htmlFile.write(str(soup))
#this writes the data to the whatever.html file.

htmlFile.close()
#I just closed the whatever.html file

htmlToOpen.close()
#Closed the test.html file

##################################################################################

http = urllib3.PoolManager()
#https://urllib3.readthedocs.io/en/latest/ I need to create a PoolManager Object

imageFile = open('test.jpg',"wb+")
#this opens the new image file that will be stored on the host server. The variable name is the same as in line 20.
#wb+ allows me to create a new image file if image.jpg doesn't work. The b in wb+ allows me to write a byte.

get = http.request('GET', imageWebPath, preload_content=False)
#I created a get object so that I can download the image. This method takes three arguments
#the first argument is a CRUD protocol (not sure if that's the correct saying, but its get, post, etc..)
#The second argument is the image path.
#Ignore the third path.

dataDownload = get.data
#I download the data from the website. This is the simpliest way that I can think of.

imageFile.write(dataDownload)
#this writes the data to the image.jpg file.

imageFile.close()