Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- from deep_translator import GoogleTranslator
- # Load environment variables from .env file
- dotenv.load_dotenv()
- # Initialize the Deep Translator Translator
- translator = GoogleTranslator(source='auto')
- # Initialize a counter for the translated files
- translated_files_count = 0
- # Folder path containing the HTML files
- folder_path = os.getenv('HTML_FOLDER_PATH')
- # Language to translate to
- target_language = 'ro'
- # HTML tags to translate
- tags_to_translate = [
- r'(<title>)(.*?)(<\/title>)',
- r'(<meta name="description" content=")(.*?)("\/>)',
- r'(<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">)(.*?)(<\/div>)',
- r'(<p>)(.*?)(<\/p>)',
- r'(<h4 class="sc-jMKfon fhunKk">)(.*?)(<\/h4>)',
- r'(<h2">)(.*?)(<\/h2>)'
- # ... alte tag-uri, structurate similar
- ]
- # Iterate over all HTML files in the folder
- for filename in os.listdir(folder_path):
- if filename.endswith((".html", ".htm")):
- # Read the HTML content from the file
- with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
- html_content = file.read()
- # Initialize a variable to store the translated content
- new_html_content = ''
- # Loop through each tag to translate
- for tag in tags_to_translate:
- matches = re.finditer(tag, html_content, re.DOTALL)
- for match in matches:
- # Extract the tag start, content, and end
- tag_start = match.group(1)
- tag_content = match.group(2)
- tag_end = match.group(3)
- # Detect the source language
- source_language = translator.detect(tag_content)
- # Translate the content
- translated_content = translator.translate(tag_content, dest=target_language)
- # Build the translated tag
- translated_tag = f"{tag_start}{translated_content}{tag_end}"
- # Replace the original tag with the translated tag
- html_content = html_content.replace(match.group(0), translated_tag)
- # Write the translated HTML content back to the file
- new_filename = f"{filename.split('.')[0]}_ro.html"
- translated_folder_path = os.path.join(folder_path, 'translated')
- if not os.path.exists(translated_folder_path):
- os.mkdir(translated_folder_path)
- with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
- file.write(html_content)
- # Increment the counter and print the filename and number
- translated_files_count += 1
- print(f"Translated file: {filename}")
- print(f"New filename: {new_filename}")
- print(f"Translated file #{translated_files_count}: {new_filename}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement