Advertisement
nicuf

googletrans translate websie

Jul 6th, 2023
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.74 KB | None | 0 0
  1. from googletrans import Translator
  2. import requests
  3. import os
  4. import re
  5. from html.parser import HTMLParser
  6.  
  7. translator = Translator()
  8.  
  9. files_from_folder = r"c:\\Folder3"
  10. use_translate_folder = True
  11. destination_language = 'ru'
  12. extension_file = ".html"
  13. directory = os.fsencode(files_from_folder)
  14.  
  15. class MyHTMLParser(HTMLParser):
  16. def __init__(self, *args, **kwargs):
  17. super().__init__(*args, **kwargs)
  18. self.translation = ""
  19. self.in_target_element = False
  20. self.self_closing_tags = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"]
  21.  
  22. def handle_starttag(self, tag, attrs):
  23. if tag == "title":
  24. self.in_target_element = True
  25. if tag == "h3" and any(attr in [("class", "font-weight-normal"), ("class", "color-black")] for attr in attrs):
  26. self.in_target_element = True
  27. if tag == "p" and any(attr in [("class", "text_obisnuit"), ("class", "text_obisnuit2")] for attr in attrs):
  28. self.in_target_element = True
  29. if tag == "meta" and ("name", "description") in attrs:
  30. attr_dict = dict(attrs)
  31. if 'content' in attr_dict:
  32. translated_content = translator.translate(attr_dict['content'], dest=destination_language).text
  33. attr_dict['content'] = translated_content
  34. self.translation += '<meta {}>'.format(' '.join('{}="{}"'.format(k, v) for k, v in attr_dict.items()))
  35. else:
  36. self.translation += self.get_starttag_text()
  37.  
  38. def handle_endtag(self, tag):
  39. if self.in_target_element and tag in ["p", "title"]:
  40. self.in_target_element = False
  41. if tag not in self.self_closing_tags:
  42. self.translation += "</{}>".format(tag)
  43.  
  44. def handle_data(self, data):
  45. if self.in_target_element and data.strip() != '' and 'pastebin.com' not in data:
  46. try:
  47. data = translator.translate(data, dest=destination_language).text
  48. except:
  49. pass
  50. self.translation += data
  51.  
  52. def handle_comment(self, data):
  53. self.translation += f"<!--{data}-->"
  54.  
  55.  
  56. for file in os.listdir(directory):
  57. filename = os.fsdecode(file)
  58. # print(filename)
  59. if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
  60. continue
  61. if filename.endswith(extension_file):
  62. with open(os.path.join(files_from_folder, filename), 'r', encoding='utf-8-sig') as f:
  63. original_html = f.read()
  64.  
  65. if 'pastebin.com' in original_html:
  66. print(f"Skipping file {filename} because it contains 'pastebin.com'")
  67. continue
  68.  
  69.  
  70. original_html = original_html.replace('<!DOCTYPE html>', '', 1)
  71.  
  72. parser = MyHTMLParser()
  73. parser.feed(original_html)
  74. translated_html = parser.translation
  75.  
  76. translated_html = re.sub(r'<meta property="og:url" content="https://neculaifantanaru.com/en/', '<meta property="og:url" content="https://neculaifantanaru.com/' + destination_language + '/', translated_html)
  77. translated_html = re.sub(r'<link rel="canonical" href="https://neculaifantanaru.com/en/', '<link rel="canonical" href="https://neculaifantanaru.com/' + destination_language + '/', translated_html)
  78. translated_html = re.sub(r'<html lang="en">', '<html lang="' + destination_language + '">', translated_html)
  79. translated_html = re.sub(r'<meta http-equiv="Content-Language" content="en"/>', '<meta http-equiv="Content-Language" content="' + destination_language + '"/>', translated_html)
  80. translated_html = re.sub(r'<meta property="og:locale" content="en"', '<meta property="og:locale" content="' + destination_language + '"', translated_html)
  81. translated_html = re.sub(r'"url": "https://neculaifantanaru.com/en/', '"url": "https://neculaifantanaru.com/' + destination_language + '/', translated_html)
  82.  
  83. new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
  84. if use_translate_folder:
  85. try:
  86. with open(os.path.join(files_from_folder + r'\translated', new_filename), 'w', encoding='utf-8-sig') as new_html:
  87. new_html.write('<!DOCTYPE html>' + translated_html)
  88. except:
  89. os.mkdir(files_from_folder + r'\translated')
  90. with open(os.path.join(files_from_folder + r'\translated', new_filename), 'w', encoding='utf-8-sig') as new_html:
  91. new_html.write('<!DOCTYPE html>' + translated_html)
  92. else:
  93. with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8-sig') as html:
  94. html.write('<!DOCTYPE html>' + translated_html)
  95. print(filename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement