parsing tags html (title, description, keywords) etc

import requests
import re

# Path to english folder 1

english_folder2 = r"c:\Folder1"

extension_file = ".html"

use_parse_folder = True

import os

en1_directory = os.fsencode(english_folder2)
en2_directory = os.fsencode(english_folder2)

# These connection words will be ignore when parsing data from <title> tag to <meta keywords> tag
LISTA_CUVINTE_LEGATURA = [
    'in', 'la', 'unei', 'si', 'sa', 'se', 'de', 'prin', 'unde', 'care', 'a',
    'al', 'prea', 'lui', 'din', 'ai', 'unui', 'acei', 'un', 'doar', 'tine',
    'ale', 'sau', 'dintre', 'intre', 'cu','ce', 'va', 'fi', 'este', 'cand', 'o',
    'cine', 'aceasta', 'ca', 'dar', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII',
    'to', 'was', 'your', 'you', 'is', 'are', 'iar', 'fara', 'aceasta', 'pe', 'tu',
    'nu', 'mai', 'ne', 'le', 'intr', 'cum', 'e', 'for', 'she', 'it', 'esti',
    'this', 'that', 'how', 'can', 't', 'must', 'be', 'the', 'and', 'do', 'so', 'or', 'ori',
    'who', 'what', 'if', 'of', 'on', 'i', 'we', 'they', 'them', 'but', 'where', 'by', 'an',
    'on', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'made', 'make', 'my', 'me', '-',
    'vom', 'voi', 'ei', 'cat', 'ar', 'putea', 'poti', 'sunteti', 'inca', 'still', 'noi', 'l',
    'ma', 's', 'dupa', 'after', 'under', 'sub', 'niste', 'some', 'those', 'he'
]

def creeaza_lista_keywords(titlu):
    # imparte titlul in 2 in functie de bara verticala |
    prima_parte_titlu = titlu.split('|')[0]
    # extrage toate cuvintele din prima parte a titlului
    keywords = re.findall(r'(?:\w|-*\!)+', prima_parte_titlu)
    # extrage keyword-urile care nu se gasesc in lista de cuvinte de legatura
    keywords_OK = list()
    for keyword in keywords:
        if keyword not in LISTA_CUVINTE_LEGATURA:
            # adauga keyword-ul cu litere mici
            keywords_OK.append(keyword.lower())
    # returneaza un string in care toate keyword-urile sunt alaturate prin ', '
    return ", ".join(keywords_OK)


print('Going through english folder')
amount = 1
for file in os.listdir(en1_directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
        continue
    if filename.endswith(extension_file):
        with open(os.path.join(english_folder2, filename), encoding='utf-8') as html:
            html = html.read()

            try:
                with open(os.path.join(english_folder2, filename), encoding='utf-8') as en_html:
                    en_html = en_html.read()

                    # title to meta
                    try:
                        title = re.search('<title.+/title>', html)[0]
                        title_content = re.search('>(.+)<', title)[1]
                    except:
                        pass

                    try:
                        meta_og_title = re.search('<meta property="og:title".*>', en_html)[0]
                        new_meta_og_title = re.sub(r'content=".+"', f'content="{title_content}"', meta_og_title)
                        en_html = en_html.replace(meta_og_title, new_meta_og_title)
                    except:
                        pass

                    try:
                        meta_keywords = re.search('<meta name="keywords".*>', en_html)[0]
                        keywords = creeaza_lista_keywords(title_content)
                        new_meta_keywords = re.sub(r'content=".+"', f'content="{keywords}"', meta_keywords)
                        en_html = en_html.replace(meta_keywords, new_meta_keywords)
                    except:
                        pass

                    try:
                        meta_abstract = re.search('<meta name="abstract".*>', en_html)[0]
                        new_meta_abstract = re.sub(r'content=".+"', f'content="{title_content}"', meta_abstract)
                        en_html = en_html.replace(meta_abstract, new_meta_abstract)
                    except:
                        pass

                    try:
                        meta_Subject = re.search('<meta name="Subject".*>', en_html)[0]
                        new_meta_Subject = re.sub(r'content=".+"', f'content="{title_content}"', meta_Subject)
                        en_html = en_html.replace(meta_Subject, new_meta_Subject)
                    except:
                        pass

                    try:
                        headline = re.search('"headline":.+', en_html)[0]
                        new_headline = re.sub(r':.+', f': "{title_content}",', headline)
                        en_html = en_html.replace(headline, new_headline)
                    except:
                        pass

                    try:
                        keywords = re.search('"keywords":.+', en_html)[0]
                        new_keywords = re.sub(r':.+', f': "{title_content}",', keywords)
                        en_html = en_html.replace(keywords, new_keywords)
                    except:
                        pass

                    # canonical to meta og:url and @id
                    try:
                        canonical_content = re.search('<link rel="canonical" href="(.+)".*>', html)[1]
                    except:
                        pass

                    try:
                        og_url = re.search('<meta property="og:url".*>', en_html)[0]
                        new_og_url = re.sub(r'content=".+"', f'content="{canonical_content}"', og_url)
                        en_html = en_html.replace(og_url, new_og_url)
                    except:
                        pass

                    try:
                        id = re.search('"@id":.+', en_html)[0]
                        new_id = re.sub(r':.+', f': "{canonical_content}"', id)
                        en_html = en_html.replace(id, new_id)
                    except:
                        pass

                    # meta description to og:description and description
                    try:
                        meta = re.search('<meta name="description".+>', html)[0]
                        meta_description = re.search('<meta name="description" content="(.+)".*>', html)[1]
                    except:
                        pass

                    try:
                        og_description = re.search('<meta property="og:description".+/>', en_html)[0]
                        new_og_description = re.sub(r'content=".+"', f'content="{meta_description}"', og_description)
                        en_html = en_html.replace(og_description, new_og_description)
                    except:
                        pass

                    try:
                        description = re.search('"description":.+', en_html)[0]
                        new_description = re.sub(r':.+', f': "{meta_description}",', description)
                        en_html = en_html.replace(description, new_description)
                    except:
                        pass

                    try:
                        en_html = re.sub('<meta name="description".+/>', meta, en_html)
                    except:
                        pass

                    try:
                        en_html = re.sub('<title.+/title>', title, en_html)
                    except:
                        pass
            except FileNotFoundError:
                continue

        print(f'{filename} parsed ({amount})')
        amount += 1
        if use_parse_folder:
            try:
                with open(os.path.join(english_folder2+r'', ''+filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(en_html)
            except:
                os.mkdir(english_folder2+r'')
                with open(os.path.join(english_folder2+r'', ''+filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(en_html)
        else:
            with open(os.path.join(english_folder2, 'parsed_'+filename), 'w', encoding='utf-8') as html:
                html.write(en_html)