Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- from googletrans import Translator
- translator = Translator()
- # Folder path
- folder_path = r"c:\Folder3\2"
- # HTML tags to translate
- tags_to_translate = ['<title>.*?</title>', '<meta name="description" content=".*?">', '<p class="text_obisnuit2">.*?</p>', '<p class="text_obisnuit">.*?</p>']
- # Words to check
- words_to_check = ['the', 'you', 'which', 'view', 'because', 'here', 'have', 'this', 'two', 'one', 'three', 'four', 'five', 'six', 'seven', 'ten', 'had', 'then', 'see', 'saw', 'also', 'than', 'that', 'must', 'make', 'from']
- # Iterate over all HTML files in the folder
- for filename in os.listdir(folder_path):
- if filename.endswith(".html"):
- with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
- html_content = file.read()
- # Iterate over all tags to translate
- for tag in tags_to_translate:
- matches = re.findall(tag, html_content, re.DOTALL)
- # Translate each match
- for match in matches:
- # Check if the match contains at least three of the specified words
- if sum(word in match for word in words_to_check) >= 3:
- # Split the match into sentences
- sentences = re.split(r'(?<=[.!?])\s+', match)
- # Translate each sentence that contains at least three of the specified words
- for sentence in sentences:
- if sum(word in sentence for word in words_to_check) >= 3:
- translation = translator.translate(sentence, dest='ru').text
- # Replace the original sentence with the translation in the match
- match = match.replace(sentence, translation)
- # Replace the original match with the translated match in the HTML content
- html_content = html_content.replace(match, translation)
- # Write the translated HTML content back to the file
- with open(os.path.join(folder_path, filename), 'w', encoding='utf-8') as file:
- file.write(html_content)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement