nicuf

G.TR.API

Jun 22nd, 2021 (edited)
824
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. Romanian: https://neculaifantanaru.com/example-google-translate-api-key-python-code-beautifulsoup.html
  2. English: https://neculaifantanaru.com/en/example-google-translate-api-key-python-code-beautifulsoup.html
  3.  
  4.  
  5. from bs4 import BeautifulSoup
  6. from bs4.formatter import HTMLFormatter
  7. import requests
  8. import json
  9. import os
  10. import six
  11. from google.cloud import translate_v2 as translate
  12.  
  13. class UnsortedAttributes(HTMLFormatter):
  14.     def attributes(self, tag):
  15.         for k, v in tag.attrs.items():
  16.             yield k, v
  17.  
  18. def translate_text(target, text):
  19.     """Translates text into the target language.
  20.  
  21.    Target must be an ISO 639-1 language code.
  22.    See https://g.co/cloud/translate/v2/translate-reference#supported_languages
  23.    """
  24.  
  25.     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "secret.json"
  26.  
  27.     translate_client = translate.Client()
  28.  
  29.     if isinstance(text, six.binary_type):
  30.         text = text.decode("utf-8")
  31.  
  32.     # Text can also be a sequence of strings, in which case this method
  33.     # will return a sequence of results for each text.
  34.     result = translate_client.translate(text, target_language=target)
  35.  
  36.     return result["translatedText"]
  37.  
  38. files_from_folder = r"C:\test"
  39. source_language = 'EN'
  40.  
  41. use_translate_folder = False
  42.  
  43. destination_language = 'ZH'
  44.  
  45. extension_file = ".html"
  46.  
  47. import os
  48.  
  49. directory = os.fsencode(files_from_folder)
  50.  
  51. def recursively_translate(node):
  52.     for x in range(len(node.contents)):
  53.         if isinstance(node.contents[x], str):
  54.             if node.contents[x].strip() != '':
  55.                 try:
  56.                     newtext = translate_text(destination_language, node.contents[x])
  57.                     node.contents[x].replaceWith(newtext)
  58.                 except:
  59.                     pass
  60.         elif node.contents[x] != None:
  61.             recursively_translate(node.contents[x])
  62.    
  63. for file in os.listdir(directory):
  64.     filename = os.fsdecode(file)
  65.     print(filename)
  66.     if filename == 'y_key_e479323ce281e459.html' or filename == 'TS_4fg4_tr78.html':
  67.         continue
  68.     if filename.endswith(extension_file):
  69.         with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
  70.             soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
  71.             for title in soup.findAll('title'):
  72.                 recursively_translate(title)
  73.                
  74.             for meta in soup.findAll('meta', {'name':'description'}):
  75.                 try:
  76.                     newtext = translate_text(destination_language, meta['content'])
  77.                     meta['content'] = newtext
  78.                 except:
  79.                     pass
  80.  
  81.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
  82.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  83.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  84.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  85.                     recursively_translate(h1)
  86.  
  87.             for p in soup.findAll('p', class_='text_obisnuit'):
  88.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  89.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  90.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  91.                     recursively_translate(p)
  92.                
  93.             for p in soup.findAll('p', class_='text_obisnuit2'):
  94.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  95.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  96.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  97.                     recursively_translate(p)
  98.  
  99.             for span in soup.findAll('span', class_='text_obisnuit2'):
  100.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  101.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  102.                 if begin_comment < str(soup).index(str(span)) < end_comment:
  103.                     recursively_translate(span)
  104.  
  105.             for li in soup.findAll('li', class_='text_obisnuit'):
  106.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  107.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  108.                 if begin_comment < str(soup).index(str(li)) < end_comment:
  109.                     recursively_translate(li)
  110.  
  111.             for a in soup.findAll('a', class_='linkMare'):
  112.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  113.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  114.                 if begin_comment < str(soup).index(str(a)) < end_comment:
  115.                     recursively_translate(a)
  116.  
  117.             for h4 in soup.findAll('h4', class_='text_obisnuit2'):
  118.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  119.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  120.                 if begin_comment < str(soup).index(str(h4)) < end_comment:
  121.                     recursively_translate(h4)
  122.  
  123.             for h5 in soup.findAll('h5', class_='text_obisnuit2'):
  124.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  125.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  126.                 if begin_comment < str(soup).index(str(h5)) < end_comment:
  127.                     recursively_translate(h5)
  128.  
  129.         print(f'{filename} translated')
  130.         soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
  131.         new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
  132.         if use_translate_folder:
  133.             try:
  134.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  135.                     new_html.write(soup[5:-6])
  136.             except:
  137.                 os.mkdir(files_from_folder+r'\translated')
  138.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  139.                     new_html.write(soup[5:-6])
  140.         else:
  141.             with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
  142.                 html.write(soup[5:-6])
  143.  
RAW Paste Data