Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from googletrans import Translator
- import requests
- import os
- import re
- from html.parser import HTMLParser
- translator = Translator()
- files_from_folder = r"c:\\Folder3\1"
- use_translate_folder = True
- destination_language = 'ru'
- extension_file = ".html"
- directory = os.fsencode(files_from_folder)
- class MyHTMLParser(HTMLParser):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.translation = ""
- self.in_target_element = False
- self.self_closing_tags = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"]
- def handle_starttag(self, tag, attrs):
- if tag == "title":
- self.in_target_element = True
- if tag == "h3" and any(attr in [("class", "font-weight-normal"), ("class", "color-black")] for attr in attrs):
- self.in_target_element = True
- if tag == "p" and any(attr in [("class", "text_obisnuit"), ("class", "text_obisnuit2")] for attr in attrs):
- self.in_target_element = True
- if tag == "meta" and ("name", "description") in attrs:
- attr_dict = dict(attrs)
- if 'content' in attr_dict:
- translated_content = translator.translate(attr_dict['content'], dest=destination_language).text
- attr_dict['content'] = translated_content
- self.translation += '<meta {}>'.format(' '.join('{}="{}"'.format(k, v) for k, v in attr_dict.items()))
- else:
- self.translation += self.get_starttag_text()
- def handle_endtag(self, tag):
- if self.in_target_element and tag in ["p", "title"]:
- self.in_target_element = False
- if tag not in self.self_closing_tags:
- self.translation += "</{}>".format(tag)
- def handle_data(self, data):
- if self.in_target_element and data.strip() != '' and 'pastebin.com' not in data:
- try:
- data = translator.translate(data, dest=destination_language).text
- except:
- pass
- self.translation += data
- def handle_comment(self, data):
- self.translation += f"<!--{data}-->"
- for file in os.listdir(directory):
- filename = os.fsdecode(file)
- # print(filename)
- if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
- continue
- if filename.endswith(extension_file):
- with open(os.path.join(files_from_folder, filename), 'r', encoding='utf-8-sig') as f:
- original_html = f.read()
- if 'pastebin.com' in original_html:
- print(f"Skipping file {filename} because it contains 'pastebin.com'")
- continue
- original_html = original_html.replace('<!DOCTYPE html>', '', 1)
- parser = MyHTMLParser()
- parser.feed(original_html)
- translated_html = parser.translation
- translated_html = re.sub(r'<meta property="og:url" content="https://neculaifantanaru.com/en/', '<meta property="og:url" content="https://neculaifantanaru.com/' + destination_language + '/', translated_html)
- translated_html = re.sub(r'<link rel="canonical" href="https://neculaifantanaru.com/en/', '<link rel="canonical" href="https://neculaifantanaru.com/' + destination_language + '/', translated_html)
- translated_html = re.sub(r'<html lang="en">', '<html lang="' + destination_language + '">', translated_html)
- translated_html = re.sub(r'<meta http-equiv="Content-Language" content="en"/>', '<meta http-equiv="Content-Language" content="' + destination_language + '"/>', translated_html)
- translated_html = re.sub(r'<meta property="og:locale" content="en"', '<meta property="og:locale" content="' + destination_language + '"', translated_html)
- translated_html = re.sub(r'"url": "https://neculaifantanaru.com/en/', '"url": "https://neculaifantanaru.com/' + destination_language + '/', translated_html)
- new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
- if use_translate_folder:
- try:
- with open(os.path.join(files_from_folder + r'\translated', new_filename), 'w', encoding='utf-8-sig') as new_html:
- new_html.write('<!DOCTYPE html>' + translated_html)
- except:
- os.mkdir(files_from_folder + r'\translated')
- with open(os.path.join(files_from_folder + r'\translated', new_filename), 'w', encoding='utf-8-sig') as new_html:
- new_html.write('<!DOCTYPE html>' + translated_html)
- else:
- with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8-sig') as html:
- html.write('<!DOCTYPE html>' + translated_html)
- print(filename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement