nicuf

deepL

Jun 9th, 2021 (edited)
928
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.68 KB | None | 0 0
  1. English:  https://neculaifantanaru.com/en/deepl-api-key-python-code-text-google-translation-beautifulsoup-languages-translate.html
  2. Romanian: https://neculaifantanaru.com/deepl-api-python-code-text-google-translation-beautifulsoup.html
  3.  
  4.  
  5. from bs4 import BeautifulSoup
  6. from bs4.formatter import HTMLFormatter
  7. from googletrans import Translator
  8. import requests
  9. import json
  10.  
  11. if False:
  12.     test = requests.post('https://api-free.deepl.com/v2/translate',
  13.                     data={'auth_key':'PUT HERE YOUR NUMBER:fx',
  14.                           'text':'hello',
  15.                           'source_lang':'EN',
  16.                           'target_lang':'ZH'  #translates into Chinesse
  17.                           }).content
  18.  
  19.     print(json.loads(test)['translations'][0]['text'])
  20.  
  21. translator = Translator()
  22.  
  23. class UnsortedAttributes(HTMLFormatter):
  24.     def attributes(self, tag):
  25.         for k, v in tag.attrs.items():
  26.             yield k, v
  27.  
  28. files_from_folder = r"c:\test" #Change with your basic Path, for example a Folder with your website written in English
  29. source_language = 'EN'  #translates from English
  30.  
  31. use_translate_folder = False
  32.  
  33. destination_language = 'ZH'  #translates into Chinesse
  34.  
  35. extension_file = ".html"
  36.  
  37. import os
  38.  
  39. directory = os.fsencode(files_from_folder)
  40.  
  41. def recursively_translate(node):
  42.     for x in range(len(node.contents)):
  43.         if isinstance(node.contents[x], str):
  44.             if node.contents[x].strip() != '':
  45.                 try:
  46.                     newtext = requests.post('https://api-free.deepl.com/v2/translate',
  47.                     data={'auth_key':'8ac87458-dd7e-528c-efd4-e2cf646b3a96:fx',
  48.                           'text':node.contents[x],
  49.                           'source_lang':source_language,
  50.                           'target_lang':destination_language
  51.                           }).content
  52.                     node.contents[x].replaceWith(json.loads(newtext)['translations'][0]['text'])
  53.                 except:
  54.                     pass
  55.         elif node.contents[x] != None:
  56.             recursively_translate(node.contents[x])
  57.    
  58. for file in os.listdir(directory):
  59.     filename = os.fsdecode(file)
  60.     print(filename)
  61.     if filename == 'y_key_e479323ce281e459.html' or filename == 'TS_4fg4_tr78.html':
  62.         continue
  63.     if filename.endswith(extension_file):
  64.         with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
  65.             soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
  66.             for title in soup.findAll('title'):
  67.                 recursively_translate(title)
  68.                
  69.             for meta in soup.findAll('meta', {'name':'description'}):
  70.                 try:
  71.                     newtext = requests.post('https://api-free.deepl.com/v2/translate',
  72.                     data={'auth_key':'8ac87458-dd7e-528c-efd4-e2cf646b3a96:fx',
  73.                           'text':meta['content'],
  74.                           'source_lang':source_language,
  75.                           'target_lang':destination_language
  76.                           }).content
  77.                     meta['content'] = json.loads(newtext)['translations'][0]['text']
  78.                 except:
  79.                     pass
  80.  
  81.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
  82.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  83.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  84.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  85.                     recursively_translate(h1)
  86.  
  87.             for p in soup.findAll('p', class_='text_obisnuit'):
  88.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  89.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  90.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  91.                     recursively_translate(p)
  92.                
  93.             for p in soup.findAll('p', class_='text_obisnuit2'):
  94.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  95.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  96.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  97.                     recursively_translate(p)
  98.  
  99.             for span in soup.findAll('span', class_='text_obisnuit2'):
  100.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  101.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  102.                 if begin_comment < str(soup).index(str(span)) < end_comment:
  103.                     recursively_translate(span)
  104.  
  105.             for li in soup.findAll('li', class_='text_obisnuit'):
  106.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  107.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  108.                 if begin_comment < str(soup).index(str(li)) < end_comment:
  109.                     recursively_translate(li)
  110.  
  111.             for a in soup.findAll('a', class_='linkMare'):
  112.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  113.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  114.                 if begin_comment < str(soup).index(str(a)) < end_comment:
  115.                     recursively_translate(a)
  116.  
  117.             for h4 in soup.findAll('h4', class_='text_obisnuit2'):
  118.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  119.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  120.                 if begin_comment < str(soup).index(str(h4)) < end_comment:
  121.                     recursively_translate(h4)
  122.  
  123.             for h5 in soup.findAll('h5', class_='text_obisnuit2'):
  124.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  125.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  126.                 if begin_comment < str(soup).index(str(h5)) < end_comment:
  127.                     recursively_translate(h5)
  128.  
  129.         print(f'{filename} translated')
  130.         soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
  131.         new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
  132.         if use_translate_folder:
  133.             try:
  134.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  135.                     new_html.write(soup[5:-6])
  136.             except:
  137.                 os.mkdir(files_from_folder+r'\translated')
  138.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  139.                     new_html.write(soup[5:-6])
  140.         else:
  141.             with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
  142.                 html.write(soup[5:-6])
  143.  
Add Comment
Please, Sign In to add comment