Advertisement
nicuf

new-regex-translate

Jul 3rd, 2023 (edited)
991
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.20 KB | None | 0 0
  1. import os
  2. import re
  3. from googletrans import Translator
  4.  
  5. translator = Translator()
  6.  
  7. # Folder path
  8. folder_path = r"c:\Folder3\2"
  9.  
  10. # HTML tags to translate
  11. tags_to_translate = ['<title>.*?</title>', '<meta name="description" content=".*?">', '<p class="text_obisnuit2">.*?</p>', '<p class="text_obisnuit">.*?</p>']
  12.  
  13. # Words to check
  14. words_to_check = ['the', 'you', 'which', 'view', 'because', 'here', 'have', 'this', 'two', 'one', 'three', 'four', 'five', 'six', 'seven', 'ten', 'had', 'then', 'see', 'saw', 'also', 'than', 'that', 'must', 'make', 'from']
  15.  
  16. # Iterate over all HTML files in the folder
  17. for filename in os.listdir(folder_path):
  18.     if filename.endswith(".html"):
  19.         with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
  20.             html_content = file.read()
  21.  
  22.             # Iterate over all tags to translate
  23.             for tag in tags_to_translate:
  24.                 matches = re.findall(tag, html_content, re.DOTALL)
  25.  
  26.                 # Translate each match
  27.                 for match in matches:
  28.                     # Check if the match contains at least three of the specified words
  29.                     if sum(word in match for word in words_to_check) >= 3:
  30.                         # Split the match into sentences
  31.                         sentences = re.split(r'(?<=[.!?])\s+', match)
  32.  
  33.                         # Translate each sentence that contains at least three of the specified words
  34.                         for sentence in sentences:
  35.                             if sum(word in sentence for word in words_to_check) >= 3:
  36.                                 translation = translator.translate(sentence, dest='ru').text
  37.  
  38.                                 # Replace the original sentence with the translation in the match
  39.                                 match = match.replace(sentence, translation)
  40.  
  41.                         # Replace the original match with the translated match in the HTML content
  42.                         html_content = html_content.replace(match, translation)
  43.  
  44.         # Write the translated HTML content back to the file
  45.         with open(os.path.join(folder_path, filename), 'w', encoding='utf-8') as file:
  46.             file.write(html_content)
  47.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement