Advertisement
nicuf

deel_translator 2

Dec 4th, 2023
526
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.46 KB | None | 0 0
  1. import os
  2. import re
  3. import dotenv
  4. from deep_translator import GoogleTranslator
  5.  
  6. # Load environment variables from .env file
  7. dotenv.load_dotenv()
  8.  
  9. # Initialize the Deep Translator Translator
  10. deep_translator = GoogleTranslator(source='en', target='ro')
  11.  
  12. # Initialize a counter for the translated files
  13. translated_files_count = 0
  14.  
  15. # Folder path containing the HTML files
  16. folder_path = os.getenv('HTML_FOLDER_PATH')
  17.  
  18. # HTML tags to translate
  19. tags_to_translate = [
  20.     r'<title>(.*?)</title>',
  21.     r'<meta name="description" content="(.*?)"/>',
  22.     r'<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">(.*?)</div>',
  23.     r'<p>(.*?)</p>',
  24.     r'<h4 class="sc-jMKfon fhunKk">(.*?)</h4>',
  25.     r'<h2">(.*?)</h2>'
  26. ]
  27.  
  28. # Iterate over all HTML files in the folder
  29. for filename in os.listdir(folder_path):
  30.     if filename.endswith((".html", ".htm")):
  31.         # Read the HTML content from the file
  32.         with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
  33.             html_content = file.read()
  34.  
  35.         # Initialize a variable to store the translated content
  36.         new_html_content = ''
  37.  
  38.         # Loop through each tag to translate
  39.         for tag in tags_to_translate:
  40.             matches = re.findall(tag, html_content, re.DOTALL)
  41.  
  42.             for match in matches:
  43.                 # Translate the content within the tag using Deep Translator
  44.                 translated_content = deep_translator.translate(match)
  45.  
  46.                 # Replace the original content with the translated content
  47.                 html_content = html_content.replace(match, translated_content.text)
  48.  
  49.                 # Store only the translated content corresponding to the specified tags
  50.                 new_html_content += translated_content.text
  51.  
  52.         # Write the translated HTML content back to the file
  53.         new_filename = f"{filename.split('.')[0]}_ro.html"
  54.         translated_folder_path = os.path.join(folder_path, 'translated')
  55.  
  56.         if not os.path.exists(translated_folder_path):
  57.             os.mkdir(translated_folder_path)
  58.  
  59.         with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
  60.             file.write(new_html_content)
  61.  
  62.         # Increment the counter and print the filename and number
  63.         translated_files_count += 1
  64.         print(f"Translated file: {filename}")
  65.         print(f"New filename: {new_filename}")
  66.         print(f"Translated file #{translated_files_count}: {new_filename}")
  67.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement