Advertisement
nicuf

deep_translator_3

Dec 4th, 2023
532
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.88 KB | None | 0 0
  1. import os
  2. import re
  3. from deep_translator import GoogleTranslator
  4.  
  5. folder_path = r"c:\\download\\myprotein\\extracted\\"
  6. tags_to_translate = [
  7.     r'(<title>)(.*?)(<\/title>)',
  8.     r'(<meta name="description" content=")(.*?)("\/>)',
  9.     r'(<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">)(.*?)(<\/div>)',
  10.     r'(<p>)(.*?)(<\/p>)',
  11.     r'(<h4 class="sc-jMKfon fhunKk">)(.*?)(<\/h4>)',
  12.     r'(<h2">)(.*?)(<\/h2>)'
  13.     # ... alte tag-uri, structurate similar
  14. ]
  15. translator = GoogleTranslator(source='auto', target='ro')
  16.  
  17. translated_files_count = 0
  18.  
  19. for filename in os.listdir(folder_path):
  20.     if filename.endswith((".html", ".htm")):
  21.         with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
  22.             html_content = file.read()
  23.  
  24.         for tag in tags_to_translate:
  25.             matches = re.finditer(tag, html_content, re.DOTALL)
  26.  
  27.             for match in matches:
  28.                 full_match = match.group(0)
  29.                 tag_start = match.group(1)
  30.                 tag_content = match.group(2)
  31.                 tag_end = match.group(3)
  32.  
  33.                 translated_content = translator.translate(tag_content)
  34.                 translated_tag = f"{tag_start}{translated_content}{tag_end}"
  35.  
  36.                 html_content = html_content.replace(full_match, translated_tag)
  37.  
  38.         new_filename = f"{filename.split('.')[0]}_ro.html"
  39.         translated_folder_path = os.path.join(folder_path, 'translated')
  40.         if not os.path.exists(translated_folder_path):
  41.             os.mkdir(translated_folder_path)
  42.  
  43.         with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
  44.             file.write(html_content)
  45.  
  46.         translated_files_count += 1
  47.         print(f"Se traduce: {filename}")
  48.         print(f"Fisierul tradus: {new_filename}")
  49.         print(f"Fișierul tradus #{translated_files_count}: {new_filename}")
  50.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement