Advertisement
nicuf

deep_translator (translate webpages with Python)

Dec 4th, 2023
610
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.50 KB | None | 0 0
  1. import os
  2. import re
  3. from deep_translator import GoogleTranslator
  4.  
  5. folder_path = r"c:\\download\\myprotein\\extracted\\"
  6. tags_to_translate = [
  7.     r'<title>(.*?)<\/title>',
  8.     r'<meta name="description" content="(.*?)"/>',
  9.     # ... alte tag-uri
  10. ]
  11. translator = GoogleTranslator(source='auto', target='ro')
  12.  
  13. translated_files_count = 0
  14.  
  15. for filename in os.listdir(folder_path):
  16.     if filename.endswith((".html", ".htm")):
  17.         with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
  18.             html_content = file.read()
  19.  
  20.         new_html_content = ''
  21.  
  22.         for tag in tags_to_translate:
  23.             matches = re.findall(tag, html_content, re.DOTALL)
  24.  
  25.             for match in matches:
  26.                 translated_content = translator.translate(match)
  27.                 html_content = html_content.replace(match, translated_content)
  28.                 new_html_content += translated_content
  29.  
  30.         new_filename = f"{filename.split('.')[0]}_ro.html"
  31.         translated_folder_path = os.path.join(folder_path, 'translated')
  32.         if not os.path.exists(translated_folder_path):
  33.             os.mkdir(translated_folder_path)
  34.  
  35.         with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
  36.             file.write(new_html_content)
  37.  
  38.         translated_files_count += 1
  39.         print(f"Se traduce: {filename}")
  40.         print(f"Fisierul tradus: {new_filename}")
  41.         print(f"Fișierul tradus #{translated_files_count}: {new_filename}")
  42.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement