Web Page Word Interrogation

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
import string

#functions
def extract_text_from_website(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        all_text = soup.get_text(separator=' ')
        lower_text = all_text.lower()
        file = open(dump_file_name, "w", encoding="utf8")
        file.write(lower_text)
    else:
        print(f"Failed to retrieve {url}. Status code: {response.status_code}")

def clean_text(input_file):
    stop_words = ['a', 'an', 'the', 'and', 'but', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can', 'will', 'just', 'you', 'we', 'be', 'are', 'is', 'this', 'if', 'which', 'it', 'do', 'also', 'us', 'may', 'their', 'put']
    try:
        with open(dump_file_name, 'r', encoding="utf8") as infile:
            content = infile.read()
            alpha_content = re.sub(r'[^A-Za-z\s]', '', content)
            words = alpha_content.split()
            non_stop_words = []
            for word in words:
              if word not in stop_words:
                non_stop_words.append(word)
                clean_content=' '.join(map(str,non_stop_words))
        with open(dump_file_name, 'w', encoding="utf8") as outfile:
            outfile.write(clean_content)
    except Exception as e:
        print(f"An error occurred: {str(e)}")

def generate_list():
    word_count = 0
    text = open(dump_file_name, "r", encoding="utf8")
    d = dict()
    for line in text:
        #line = line.lower()
        words = line.split(" ")
    for word in words:
        if word in d:
            d[word] = d[word] + 1
        else:
            d[word] = 1
            word_count = word_count + 1
    ds = sorted(d.items(), key = lambda t:  t[1], reverse = True)
    with open(dump_file_name, "w", encoding="utf8") as file:
        file.write("Content Word Density Summary\n")
        file.write("URL: "+(target_url)+"\n")
        file.write("Word Count: "+str(word_count)+"\n")
        file.write("----------------------------\n")
        for items in ds:
            file.write(f"{items}\n")

def extract_domain(target_url):
    parsed_url = urlparse(target_url)
    domain = parsed_url.netloc
    return domain

target_url = 'https://en.wikipedia.org/wiki/Antarctica'
domain = extract_domain(target_url)
dump_file_name = ((domain)+'.txt')
extract_text_from_website(target_url)
clean_text(dump_file_name)
generate_list()

#for development--------------------------------------
#word stats 1.5
#remove stop words 1.6
#remove brackets and quotes in report (1.7)
#input to request the url
#combine links (broken)
#combine meta information
#combine alt-text
#add page depth (internal link following) option
#gui version