translate_python

from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import re
#import execjs
from urllib import parse
import json

class Py4Js():

    def __init__(self):
        self.ctx = execjs.compile("""
        function TL(a) {
        var k = "";
        var b = 406644;
        var b1 = 3293161072;

        var jd = ".";
        var $b = "+-a^+6";
        var Zb = "+-3^+b+-f";

        for (var e = [], f = 0, g = 0; g < a.length; g++) {
            var m = a.charCodeAt(g);
            128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
            e[f++] = m >> 18 | 240,
            e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
            e[f++] = m >> 6 & 63 | 128),
            e[f++] = m & 63 | 128)
        }
        a = b;
        for (f = 0; f < e.length; f++) a += e[f],
        a = RL(a, $b);
        a = RL(a, Zb);
        a ^= b1 || 0;
        0 > a && (a = (a & 2147483647) + 2147483648);
        a %= 1E6;
        return a.toString() + jd + (a ^ b)
    };

    function RL(a, b) {
        var t = "a";
        var Yb = "+";
        for (var c = 0; c < b.length - 2; c += 3) {
            var d = b.charAt(c + 2),
            d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
            d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
            a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
        }
        return a
    }
    """)

    def getTk(self, text):
        return self.ctx.call("TL", text)

class Translate_as_google(object):
    def __init__(self, to_language, this_language='auto', read=False):
        '''
            to_language:The language to be translated into
            this_language:The text to be converted, the default is auto
            read:Generate a text reading file at the specified location
        '''
        self.this_language = this_language
        self.to_language = to_language
        self.read = read

    def open_url(self, url):
        '''请求'''
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
        req = requests.get(url=url, headers=headers , timeout=8)

        return req

    def buildUrl(self):
        '''封装请求url
            sl:The text to be converted tl:The result type of the conversion qThe text to be entered'''
        baseUrl = 'http://translate.google.cn/translate_a/single'
        baseUrl += '?client=webapp&'
        baseUrl += 'sl=%s&' % self.this_language
        baseUrl += 'tl=%s&' % self.to_language
        baseUrl += 'hl=zh-CN&'
        baseUrl += 'dt=at&'
        baseUrl += 'dt=bd&'
        baseUrl += 'dt=ex&'
        baseUrl += 'dt=ld&'
        baseUrl += 'dt=md&'
        baseUrl += 'dt=qca&'
        baseUrl += 'dt=rw&'
        baseUrl += 'dt=rm&'
        baseUrl += 'dt=ss&'
        baseUrl += 'dt=t&'
        baseUrl += 'ie=UTF-8&'
        baseUrl += 'oe=UTF-8&'
        baseUrl += 'clearbtn=1&'
        baseUrl += 'otf=1&'
        baseUrl += 'pc=1&'
        baseUrl += 'srcrom=0&'
        baseUrl += 'ssel=0&'
        baseUrl += 'tsel=0&'
        baseUrl += 'kc=2&'
        baseUrl += 'tk=' + str(self.tk) + '&'
        baseUrl += 'q=' + parse.quote(self.text)
        return baseUrl

    def read_go(self, args):
        '''Speaking interception
        upload:Download to path and file name
        return_language:Language type returned
        '''
        upload, return_language = args[0], args[1]
        read_translate_url = 'http://translate.google.cn/translate_tts?ie=UTF-8&q=%s&tl=%s&total=1&idx=0&textlen=3&tk=%s&client=webapp&prev=input' % (
            self.text, return_language, self.tk)
        data = self.open_url(read_translate_url) #Return all data requested
        with open(upload, 'wb') as f:
            f.write(data.content)

    def translate(self,text):
        '''Translation interception'''
        self.text = text
        js = Py4Js()
        self.tk = js.getTk(self.text)

        if len(self.text) > 4891:
            raise ("The length of the translation exceeds the limit！！！")
        url = self.buildUrl()
        # print(url)
        _result = self.open_url(url)
        data = _result.content.decode('utf-8')

        tmp = json.loads(data)
        jsonArray = tmp[0]
        result = None
        for jsonItem in jsonArray:
            if jsonItem[0]:
                if result:
                    result = result + " " + jsonItem[0]
                else:
                    result = jsonItem[0]
        return result

class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            yield k, v

# Path to english folder
english_folder = r"c:\Folder1\5\en"

# Path to french folder
french_folder = r"c:\Folder1\5\fr"

source_language = 'en'

destination_language = 'fr'

extension_file = ".html"

use_translate_folder = True

import os

en_directory = os.fsencode(english_folder)
fr_directory = os.fsencode(french_folder)

def recursively_translate(node):
    for x in range(len(node.contents)):
        if isinstance(node.contents[x], str):
            if node.contents[x].strip() != '':
                try:
                    node.contents[x].replaceWith(translator.translate(node.contents[x], src=source_language, dest=destination_language).text)
                except:
                    pass
        elif node.contents[x] != None:
            recursively_translate(node.contents[x])

print('Going through english folder')
for file in os.listdir(en_directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
        continue
    if filename.endswith(extension_file):
        with open(os.path.join(english_folder, filename), encoding='utf-8') as html:
            html = html.read()
            fr_file = re.search('/fr/(\S+)"', html)[1]

            try:
                with open(os.path.join(french_folder, fr_file), encoding='utf-8') as fr_html:
                    fr_html = fr_html.read()

                    title = re.search('<title.+/title>', html)[0]
                    meta = re.search('<meta name="description".+/>', html)[0]
                    comment_body = re.search('<!-- ARTICOL START -->.+<!-- ARTICOL FINAL -->', html, flags=re.DOTALL)[0]

                    fr_html = re.sub('<!-- ARTICOL START -->.+<!-- ARTICOL FINAL -->', comment_body, fr_html, flags=re.DOTALL)
                    fr_html = re.sub('<meta name="description".+/>', meta, fr_html)
                    fr_html = re.sub('<title.+/title>', title, fr_html)
                    parsed_html = fr_html

                    soup = BeautifulSoup('<pre>' + fr_html + '</pre>', 'html.parser')

                    for title in soup.findAll('title'):
                        recursively_translate(title)

                    for meta in soup.findAll('meta', {'name':'description'}):
                        try:
                            meta['content'] = translator.translate(meta['content'], src=source_language, dest=destination_language).text
                        except:
                            pass

                    for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(h1)) < end_comment:
                            recursively_translate(h1)

                    for p in soup.findAll('p', class_='text_obisnuit'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(p)) < end_comment:
                            recursively_translate(p)

                    for p in soup.findAll('p', class_='text_obisnuit2'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(p)) < end_comment:
                            recursively_translate(p)

                    for span in soup.findAll('span', class_='text_obisnuit2'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(span)) < end_comment:
                            recursively_translate(span)

                    for li in soup.findAll('li', class_='text_obisnuit'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(li)) < end_comment:
                            recursively_translate(li)

                    for a in soup.findAll('a', class_='linkMare'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(a)) < end_comment:
                            recursively_translate(a)

                    for h4 in soup.findAll('h4', class_='text_obisnuit2'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(h4)) < end_comment:
                            recursively_translate(h4)

                    for h5 in soup.findAll('h5', class_='text_obisnuit2'):
                        begin_comment = str(soup).index('<!-- ARTICOL START -->')
                        end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                        if begin_comment < str(soup).index(str(h5)) < end_comment:
                            recursively_translate(h5)
            except FileNotFoundError:
                continue

        print(f'{fr_file} parsed and translated')
        soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
        if use_translate_folder:
            try:
                with open(os.path.join(french_folder+r'\parsed+translated', 'parsed+translated_'+fr_file), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
            except:
                os.mkdir(french_folder+r'\parsed+translated')
                with open(os.path.join(french_folder+r'\parsed+translated', 'parsed+translated_'+fr_file), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
        else:
            with open(os.path.join(french_folder, 'parsed+translated_'+fr_file), 'w', encoding='utf-8') as html:
                html.write(soup[5:-6])