Advertisement
nicuf

BARD - deep_translator

Dec 4th, 2023
509
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.80 KB | None | 0 0
  1. import os
  2. import re
  3. from deep_translator import GoogleTranslator
  4.  
  5. # Load environment variables from .env file
  6. dotenv.load_dotenv()
  7.  
  8. # Initialize the Deep Translator Translator
  9. translator = GoogleTranslator(source='auto')
  10.  
  11. # Initialize a counter for the translated files
  12. translated_files_count = 0
  13.  
  14. # Folder path containing the HTML files
  15. folder_path = os.getenv('HTML_FOLDER_PATH')
  16.  
  17. # Language to translate to
  18. target_language = 'ro'
  19.  
  20. # HTML tags to translate
  21. tags_to_translate = [
  22.     r'(<title>)(.*?)(<\/title>)',
  23.     r'(<meta name="description" content=")(.*?)("\/>)',
  24.     r'(<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">)(.*?)(<\/div>)',
  25.     r'(<p>)(.*?)(<\/p>)',
  26.     r'(<h4 class="sc-jMKfon fhunKk">)(.*?)(<\/h4>)',
  27.     r'(<h2">)(.*?)(<\/h2>)'
  28.     # ... alte tag-uri, structurate similar
  29. ]
  30.  
  31. # Iterate over all HTML files in the folder
  32. for filename in os.listdir(folder_path):
  33.     if filename.endswith((".html", ".htm")):
  34.         # Read the HTML content from the file
  35.         with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
  36.             html_content = file.read()
  37.  
  38.         # Initialize a variable to store the translated content
  39.         new_html_content = ''
  40.  
  41.         # Loop through each tag to translate
  42.         for tag in tags_to_translate:
  43.             matches = re.finditer(tag, html_content, re.DOTALL)
  44.  
  45.             for match in matches:
  46.                 # Extract the tag start, content, and end
  47.                 tag_start = match.group(1)
  48.                 tag_content = match.group(2)
  49.                 tag_end = match.group(3)
  50.  
  51.                 # Detect the source language
  52.                 source_language = translator.detect(tag_content)
  53.  
  54.                 # Translate the content
  55.                 translated_content = translator.translate(tag_content, dest=target_language)
  56.  
  57.                 # Build the translated tag
  58.                 translated_tag = f"{tag_start}{translated_content}{tag_end}"
  59.  
  60.                 # Replace the original tag with the translated tag
  61.                 html_content = html_content.replace(match.group(0), translated_tag)
  62.  
  63.         # Write the translated HTML content back to the file
  64.         new_filename = f"{filename.split('.')[0]}_ro.html"
  65.         translated_folder_path = os.path.join(folder_path, 'translated')
  66.  
  67.         if not os.path.exists(translated_folder_path):
  68.             os.mkdir(translated_folder_path)
  69.  
  70.         with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
  71.             file.write(html_content)
  72.  
  73.         # Increment the counter and print the filename and number
  74.         translated_files_count += 1
  75.         print(f"Translated file: {filename}")
  76.         print(f"New filename: {new_filename}")
  77.         print(f"Translated file #{translated_files_count}: {new_filename}")
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement