nicuf

deep

Jun 9th, 2021
449
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from bs4 import BeautifulSoup
  2. from bs4.formatter import HTMLFormatter
  3. from googletrans import Translator
  4. import requests
  5. import json
  6.  
  7. if False:
  8.     test = requests.post('https://api-free.deepl.com/v2/translate',
  9.                     data={'auth_key':'PUT HERE YOUR NUMBER:fx',
  10.                           'text':'hello',
  11.                           'source_lang':'EN',
  12.                           'target_lang':'ZH'  #translates into Chinesse
  13.                           }).content
  14.  
  15.     print(json.loads(test)['translations'][0]['text'])
  16.  
  17. translator = Translator()
  18.  
  19. class UnsortedAttributes(HTMLFormatter):
  20.     def attributes(self, tag):
  21.         for k, v in tag.attrs.items():
  22.             yield k, v
  23.  
  24. files_from_folder = r"c:\test" #Change with your basic Path, for example a Folder with your website written in English
  25. source_language = 'EN'  #translates from English
  26.  
  27. use_translate_folder = False
  28.  
  29. destination_language = 'ZH'  #translates into Chinesse
  30.  
  31. extension_file = ".html"
  32.  
  33. import os
  34.  
  35. directory = os.fsencode(files_from_folder)
  36.  
  37. def recursively_translate(node):
  38.     for x in range(len(node.contents)):
  39.         if isinstance(node.contents[x], str):
  40.             if node.contents[x].strip() != '':
  41.                 try:
  42.                     newtext = requests.post('https://api-free.deepl.com/v2/translate',
  43.                     data={'auth_key':'8ac87458-dd7e-528c-efd4-e2cf646b3a96:fx',
  44.                           'text':node.contents[x],
  45.                           'source_lang':source_language,
  46.                           'target_lang':destination_language
  47.                           }).content
  48.                     node.contents[x].replaceWith(json.loads(newtext)['translations'][0]['text'])
  49.                 except:
  50.                     pass
  51.         elif node.contents[x] != None:
  52.             recursively_translate(node.contents[x])
  53.    
  54. for file in os.listdir(directory):
  55.     filename = os.fsdecode(file)
  56.     print(filename)
  57.     if filename == 'y_key_e479323ce281e459.html' or filename == 'TS_4fg4_tr78.html':
  58.         continue
  59.     if filename.endswith(extension_file):
  60.         with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
  61.             soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
  62.             for title in soup.findAll('title'):
  63.                 recursively_translate(title)
  64.                
  65.             for meta in soup.findAll('meta', {'name':'description'}):
  66.                 try:
  67.                     newtext = requests.post('https://api-free.deepl.com/v2/translate',
  68.                     data={'auth_key':'8ac87458-dd7e-528c-efd4-e2cf646b3a96:fx',
  69.                           'text':meta['content'],
  70.                           'source_lang':source_language,
  71.                           'target_lang':destination_language
  72.                           }).content
  73.                     meta['content'] = json.loads(newtext)['translations'][0]['text']
  74.                 except:
  75.                     pass
  76.  
  77.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
  78.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  79.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  80.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  81.                     recursively_translate(h1)
  82.  
  83.             for p in soup.findAll('p', class_='text_obisnuit'):
  84.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  85.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  86.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  87.                     recursively_translate(p)
  88.                
  89.             for p in soup.findAll('p', class_='text_obisnuit2'):
  90.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  91.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  92.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  93.                     recursively_translate(p)
  94.  
  95.             for span in soup.findAll('span', class_='text_obisnuit2'):
  96.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  97.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  98.                 if begin_comment < str(soup).index(str(span)) < end_comment:
  99.                     recursively_translate(span)
  100.  
  101.             for li in soup.findAll('li', class_='text_obisnuit'):
  102.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  103.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  104.                 if begin_comment < str(soup).index(str(li)) < end_comment:
  105.                     recursively_translate(li)
  106.  
  107.             for a in soup.findAll('a', class_='linkMare'):
  108.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  109.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  110.                 if begin_comment < str(soup).index(str(a)) < end_comment:
  111.                     recursively_translate(a)
  112.  
  113.             for h4 in soup.findAll('h4', class_='text_obisnuit2'):
  114.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  115.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  116.                 if begin_comment < str(soup).index(str(h4)) < end_comment:
  117.                     recursively_translate(h4)
  118.  
  119.             for h5 in soup.findAll('h5', class_='text_obisnuit2'):
  120.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  121.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  122.                 if begin_comment < str(soup).index(str(h5)) < end_comment:
  123.                     recursively_translate(h5)
  124.  
  125.         print(f'{filename} translated')
  126.         soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
  127.         new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
  128.         if use_translate_folder:
  129.             try:
  130.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  131.                     new_html.write(soup[5:-6])
  132.             except:
  133.                 os.mkdir(files_from_folder+r'\translated')
  134.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  135.                     new_html.write(soup[5:-6])
  136.         else:
  137.             with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
  138.                 html.write(soup[5:-6])
  139.  
RAW Paste Data