nicuf

Python Translate all txt/html files without API

Sep 29th, 2021 (edited)
281
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.83 KB | None | 0 0
  1. Explanation here:
  2.  
  3. ENGLISH: https://neculaifantanaru.com/en/example-python-google-translate-any-text-html-file.html
  4. ROMANIAN: https://neculaifantanaru.com/ro/example-python-google-translate-any-text-html-file.html
  5.  
  6.  
  7. #!/usr/bin/env python
  8. # -*- encoding: utf-8 -*-
  9. '''
  10. @File    :  google_trans.py
  11. @Time    :  2020/5/15 9:29
  12. @Author  :  hxluo
  13. @Version :  1.0
  14. @Contact :  465801795@qq.com
  15. @Desc    :  google translate
  16.  
  17. '''
  18. # import lib
  19. import requests
  20. import json
  21. from bs4 import BeautifulSoup
  22. import execjs
  23. from urllib import parse
  24. import re
  25. import os
  26.  
  27. class Py4Js():
  28.  
  29.     def __init__(self):
  30.         self.ctx = execjs.compile("""
  31.        function TL(a) {
  32.        var k = "";
  33.        var b = 406644;
  34.        var b1 = 3293161072;
  35.  
  36.        var jd = ".";
  37.        var $b = "+-a^+6";
  38.        var Zb = "+-3^+b+-f";
  39.  
  40.        for (var e = [], f = 0, g = 0; g < a.length; g++) {
  41.            var m = a.charCodeAt(g);
  42.            128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
  43.            e[f++] = m >> 18 | 240,
  44.            e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
  45.            e[f++] = m >> 6 & 63 | 128),
  46.            e[f++] = m & 63 | 128)
  47.        }
  48.        a = b;
  49.        for (f = 0; f < e.length; f++) a += e[f],
  50.        a = RL(a, $b);
  51.        a = RL(a, Zb);
  52.        a ^= b1 || 0;
  53.        0 > a && (a = (a & 2147483647) + 2147483648);
  54.        a %= 1E6;
  55.        return a.toString() + jd + (a ^ b)
  56.    };
  57.  
  58.    function RL(a, b) {
  59.        var t = "a";
  60.        var Yb = "+";
  61.        for (var c = 0; c < b.length - 2; c += 3) {
  62.            var d = b.charAt(c + 2),
  63.            d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
  64.            d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
  65.            a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
  66.        }
  67.        return a
  68.    }
  69.    """)
  70.  
  71.     def getTk(self, text):
  72.         return self.ctx.call("TL", text)
  73. class Translate_as_google(object):
  74.     def __init__(self, to_language, this_language='auto', read=False):
  75.         '''
  76.            to_language:要翻译成的语言
  77.            this_language:要转换的文字,默认为auto自动
  78.            read:在指定位置生成text的朗读文件
  79.        '''
  80.         self.this_language = this_language
  81.         self.to_language = to_language
  82.         self.read = read
  83.  
  84.     def open_url(self, url):
  85.         '''请求'''
  86.         headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
  87.         req = requests.get(url=url, headers=headers , timeout=8)
  88.  
  89.         return req
  90.  
  91.     def buildUrl(self):
  92.         '''封装请求url
  93.            sl:要转换的文字 tl:转换的结果类型 q要输入的文字'''
  94.         baseUrl = 'http://translate.google.cn/translate_a/single'
  95.         baseUrl += '?client=webapp&'
  96.         baseUrl += 'sl=%s&' % self.this_language
  97.         baseUrl += 'tl=%s&' % self.to_language
  98.         baseUrl += 'hl=zh-CN&'
  99.         baseUrl += 'dt=at&'
  100.         baseUrl += 'dt=bd&'
  101.         baseUrl += 'dt=ex&'
  102.         baseUrl += 'dt=ld&'
  103.         baseUrl += 'dt=md&'
  104.         baseUrl += 'dt=qca&'
  105.         baseUrl += 'dt=rw&'
  106.         baseUrl += 'dt=rm&'
  107.         baseUrl += 'dt=ss&'
  108.         baseUrl += 'dt=t&'
  109.         baseUrl += 'ie=UTF-8&'
  110.         baseUrl += 'oe=UTF-8&'
  111.         baseUrl += 'clearbtn=1&'
  112.         baseUrl += 'otf=1&'
  113.         baseUrl += 'pc=1&'
  114.         baseUrl += 'srcrom=0&'
  115.         baseUrl += 'ssel=0&'
  116.         baseUrl += 'tsel=0&'
  117.         baseUrl += 'kc=2&'
  118.         baseUrl += 'tk=' + str(self.tk) + '&'
  119.         baseUrl += 'q=' + parse.quote(self.text)
  120.         return baseUrl
  121.  
  122.     def read_go(self, args):
  123.         '''朗读截取
  124.        upload:下载到路径及文件名称
  125.        return_language:返回的语言类型
  126.        '''
  127.         upload, return_language = args[0], args[1]
  128.         read_translate_url = 'http://translate.google.cn/translate_tts?ie=UTF-8&q=%s&tl=%s&total=1&idx=0&textlen=3&tk=%s&client=webapp&prev=input' % (
  129.             self.text, return_language, self.tk)
  130.         data = self.open_url(read_translate_url) #请求的返回所有数据
  131.         with open(upload, 'wb') as f:
  132.             f.write(data.content)
  133.  
  134.     def translate(self,text):
  135.         '''翻译截取'''
  136.         self.text = text
  137.         js = Py4Js()
  138.         self.tk = js.getTk(self.text)
  139.  
  140.         if len(self.text) > 4891:
  141.             raise ("The length of the translation exceeds the limit!!!")
  142.         url = self.buildUrl()
  143.         # print(url)
  144.         _result = self.open_url(url)
  145.         data = _result.content.decode('utf-8')
  146.  
  147.         tmp = json.loads(data)
  148.         jsonArray = tmp[0]
  149.         result = None
  150.         for jsonItem in jsonArray:
  151.             if jsonItem[0]:
  152.                 if result:
  153.                     result = result + " " + jsonItem[0]
  154.                 else:
  155.                     result = jsonItem[0]
  156.         return result
  157.  
  158. if __name__ == '__main__':
  159.  
  160.     source = 'en' # put the language from the text file
  161.     target = 'ro' # put the language in which you want to translate
  162.     directory = r'c:\Folder1\translated\test'
  163.     for filename in os.listdir(directory):
  164.         if filename.endswith(".txt") or filename.endswith(".html"):
  165.             with open(os.path.join(directory, filename), encoding='utf-8') as f:
  166.                 file_text = f.read()
  167.                 ts = Translate_as_google(target, source)
  168.                 translated_text = ts.translate(file_text)
  169.                 print(translated_text)
  170.                 with open(os.path.join(directory+r'\translated', filename.split('.')[0] + '_{}'.format(target)) + '.' + filename.split('.')[1], 'w', encoding='utf-8') as f:
  171.                     f.write(translated_text)
  172.         else:
  173.             continue
  174.  
  175.  
  176.  
  177.  
Add Comment
Please, Sign In to add comment