Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- from deep_translator import GoogleTranslator
- folder_path = r"c:\\download\\myprotein\\extracted\\"
- tags_to_translate = [
- r'(<title>)(.*?)(<\/title>)',
- r'(<meta name="description" content=")(.*?)("\/>)',
- r'(<div class="sc-jKDlA-D hSgfYV sc-glENfF hIVUeB">)(.*?)(<\/div>)',
- r'(<p>)(.*?)(<\/p>)',
- r'(<h4 class="sc-jMKfon fhunKk">)(.*?)(<\/h4>)',
- r'(<h2">)(.*?)(<\/h2>)'
- # ... alte tag-uri, structurate similar
- ]
- translator = GoogleTranslator(source='auto', target='ro')
- translated_files_count = 0
- for filename in os.listdir(folder_path):
- if filename.endswith((".html", ".htm")):
- with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
- html_content = file.read()
- for tag in tags_to_translate:
- matches = re.finditer(tag, html_content, re.DOTALL)
- for match in matches:
- full_match = match.group(0)
- tag_start = match.group(1)
- tag_content = match.group(2)
- tag_end = match.group(3)
- translated_content = translator.translate(tag_content)
- translated_tag = f"{tag_start}{translated_content}{tag_end}"
- html_content = html_content.replace(full_match, translated_tag)
- new_filename = f"{filename.split('.')[0]}_ro.html"
- translated_folder_path = os.path.join(folder_path, 'translated')
- if not os.path.exists(translated_folder_path):
- os.mkdir(translated_folder_path)
- with open(os.path.join(translated_folder_path, new_filename), 'w', encoding='utf-8') as file:
- file.write(html_content)
- translated_files_count += 1
- print(f"Se traduce: {filename}")
- print(f"Fisierul tradus: {new_filename}")
- print(f"Fișierul tradus #{translated_files_count}: {new_filename}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement