BARD - deep_translator

import os
import re
from deep_translator import GoogleTranslator

# Load environment variables from .env file
dotenv.load_dotenv()

# Initialize the Deep Translator Translator
translator = GoogleTranslator(source='auto')

# Initialize a counter for the translated files
translated_files_count = 0

# Folder path containing the HTML files
folder_path = os.getenv('HTML_FOLDER_PATH')

# Language to translate to
target_language = 'ro'

# HTML tags to translate
tags_to_translate = [
    r'(<title>)(.*?)(<\/title>)',
    r'(<meta name="description" content=")(.*?)("\/>)',
    r'(<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">)(.*?)(<\/div>)',
    r'(<p>)(.*?)(<\/p>)',
    r'(<h4 class="sc-jMKfon fhunKk">)(.*?)(<\/h4>)',
    r'(<h2">)(.*?)(<\/h2>)'
    # ... alte tag-uri, structurate similar
]

# Iterate over all HTML files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith((".html", ".htm")):
        # Read the HTML content from the file
        with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
            html_content = file.read()

        # Initialize a variable to store the translated content
        new_html_content = ''

        # Loop through each tag to translate
        for tag in tags_to_translate:
            matches = re.finditer(tag, html_content, re.DOTALL)

            for match in matches:
                # Extract the tag start, content, and end
                tag_start = match.group(1)
                tag_content = match.group(2)
                tag_end = match.group(3)

                # Detect the source language
                source_language = translator.detect(tag_content)

                # Translate the content
                translated_content = translator.translate(tag_content, dest=target_language)

                # Build the translated tag
                translated_tag = f"{tag_start}{translated_content}{tag_end}"

                # Replace the original tag with the translated tag
                html_content = html_content.replace(match.group(0), translated_tag)

        # Write the translated HTML content back to the file
        new_filename = f"{filename.split('.')[0]}_ro.html"
        translated_folder_path = os.path.join(folder_path, 'translated')

        if not os.path.exists(translated_folder_path):
            os.mkdir(translated_folder_path)

        with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
            file.write(html_content)

        # Increment the counter and print the filename and number
        translated_files_count += 1
        print(f"Translated file: {filename}")
        print(f"New filename: {new_filename}")
        print(f"Translated file #{translated_files_count}: {new_filename}")