nicuf

Translate website with Python and Google Translate

Dec 8th, 2021 (edited)
343
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.65 KB | None | 0 0
  1. Explanations:
  2.  
  3. ENGLISH:  https://neculaifantanaru.com/en/how-to-python-code-google-translate-website.html
  4. ---------------------
  5.  
  6.  
  7. from bs4 import BeautifulSoup
  8. from bs4.formatter import HTMLFormatter
  9. import requests
  10. import sys
  11. import os
  12.  
  13. class UnsortedAttributes(HTMLFormatter):
  14.     def attributes(self, tag):
  15.         for k, v in tag.attrs.items():
  16.             yield k, v
  17.  
  18. files_from_folder = r"c:\Folder2\translated"
  19. use_translate_folder = True
  20. destination_language = 'vi'  #aici schimbi limba in care vrei sa traduci
  21. extension_file = ".html"
  22. directory = os.fsencode(files_from_folder)
  23.  
  24. def translate(text, target_language):
  25.     url = "https://translate.google.com/translate_a/single"
  26.     headers = {
  27.         "Host": "translate.google.com",
  28.         "Accept": "*/*",
  29.         "Cookie": "",
  30.         "User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
  31.         "Accept-Language": "fr",
  32.         "Accept-Encoding": "gzip, deflate",
  33.         "Connection": "keep-alive",
  34.         }
  35.     sentence = text
  36.     params = {
  37.         "client": "it",
  38.         "dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
  39.         "otf": "2",
  40.         "dj": "1",
  41.         "q": sentence,
  42.         "hl": "ja",
  43.         "ie": "UTF-8",
  44.         "oe": "UTF-8",
  45.         "sl": "en",
  46.         "tl": target_language,
  47.         }
  48.  
  49.     res = requests.get(
  50.         url=url,
  51.         headers=headers,
  52.         params=params,
  53.         )
  54.  
  55.     res = res.json()
  56.  
  57.     paragraph = ''
  58.     for i in range(0, len(res["sentences"])):
  59.         paragraph += res["sentences"][i]["trans"]
  60.  
  61.     return paragraph
  62.  
  63. def recursively_translate(node):
  64.     for x in range(len(node.contents)):
  65.         if isinstance(node.contents[x], str):
  66.             if node.contents[x].strip() != '':
  67.                 try:
  68.                     node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
  69.                 except:
  70.                     pass
  71.         elif node.contents[x] != None:
  72.             recursively_translate(node.contents[x])
  73.  
  74. for file in os.listdir(directory):
  75.     filename = os.fsdecode(file)
  76.     print(filename)
  77.     if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
  78.         continue
  79.     if filename.endswith(extension_file):
  80.         with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
  81.             soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
  82.             for title in soup.findAll('title'):
  83.                 recursively_translate(title)
  84.  
  85.             for meta in soup.findAll('meta', {'name':'description'}):
  86.                 try:
  87.                     meta['content'] = translate(text=meta['content'], target_language=destination_language)
  88.                 except:
  89.                     pass
  90.  
  91.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
  92.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  93.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  94.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  95.                     recursively_translate(h1)
  96.  
  97.             for p in soup.findAll('p', class_='text_obisnuit'):
  98.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  99.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  100.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  101.                     recursively_translate(p)
  102.  
  103.             for p in soup.findAll('p', class_='text_obisnuit2'):
  104.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  105.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  106.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  107.                     recursively_translate(p)
  108.  
  109.             for span in soup.findAll('span', class_='text_obisnuit2'):
  110.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  111.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  112.                 if begin_comment < str(soup).index(str(span)) < end_comment:
  113.                     recursively_translate(span)
  114.  
  115.             for li in soup.findAll('li', class_='text_obisnuit'):
  116.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  117.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  118.                 if begin_comment < str(soup).index(str(li)) < end_comment:
  119.                     recursively_translate(li)
  120.  
  121.             for a in soup.findAll('a', class_='linkMare'):
  122.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  123.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  124.                 if begin_comment < str(soup).index(str(a)) < end_comment:
  125.                     recursively_translate(a)
  126.  
  127.             for h4 in soup.findAll('h4', class_='text_obisnuit2'):
  128.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  129.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  130.                 if begin_comment < str(soup).index(str(h4)) < end_comment:
  131.                     recursively_translate(h4)
  132.  
  133.             for h5 in soup.findAll('h5', class_='text_obisnuit2'):
  134.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  135.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  136.                 if begin_comment < str(soup).index(str(h5)) < end_comment:
  137.                     recursively_translate(h5)
  138.  
  139.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
  140.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  141.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  142.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  143.                     recursively_translate(h1)
  144.  
  145.         print(f'{filename} translated')
  146.         soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
  147.         new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
  148.         if use_translate_folder:
  149.             try:
  150.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  151.                     new_html.write(soup[5:-6])
  152.             except:
  153.                 os.mkdir(files_from_folder+r'\translated')
  154.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  155.                     new_html.write(soup[5:-6])
  156.         else:
  157.             with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
  158.                 html.write(soup[5:-6])
  159.  
Add Comment
Please, Sign In to add comment