Advertisement
nicuf

google-trans

Jun 9th, 2021 (edited)
1,160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.41 KB | None | 0 0
  1. English:  https://neculaifantanaru.com/en/python-code-text-google-translate-website-translation-beautifulsoup-library.html
  2. Romanian:  https://neculaifantanaru.com/python-code-text-google-translate-website-translation-beautifulsoup.html
  3.  
  4.  
  5. from bs4 import BeautifulSoup
  6. from bs4.formatter import HTMLFormatter
  7. from googletrans import Translator
  8. import requests
  9.  
  10. translator = Translator()
  11.  
  12. class UnsortedAttributes(HTMLFormatter):
  13.     def attributes(self, tag):
  14.         for k, v in tag.attrs.items():
  15.             yield k, v
  16.  
  17. files_from_folder = r"e:\Carte\BB\17 - Site Leadership\Principal"
  18.  
  19. use_translate_folder = False
  20.  
  21. destination_language = 'ceb'
  22.  
  23. extension_file = ".html"
  24.  
  25. import os
  26.  
  27. directory = os.fsencode(files_from_folder)
  28.  
  29. def recursively_translate(node):
  30.     for x in range(len(node.contents)):
  31.         if isinstance(node.contents[x], str):
  32.             if node.contents[x].strip() != '':
  33.                 try:
  34.                     node.contents[x].replaceWith(translator.translate(node.contents[x], dest=destination_language).text)
  35.                 except:
  36.                     pass
  37.         elif node.contents[x] != None:
  38.             recursively_translate(node.contents[x])
  39.  
  40. for file in os.listdir(directory):
  41.     filename = os.fsdecode(file)
  42.     print(filename)
  43.     if filename == 'y_key_e479323ce281e459.html' or filename == 'TS_4fg4_tr78.html': #ignore this 2 files
  44.         continue
  45.     if filename.endswith(extension_file):
  46.         with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
  47.             soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
  48.             for title in soup.findAll('title'):
  49.                 recursively_translate(title)
  50.  
  51.             for meta in soup.findAll('meta', {'name':'description'}):
  52.                 try:
  53.                     meta['content'] = translator.translate(meta['content'], dest=destination_language).text
  54.                 except:
  55.                     pass
  56.  
  57.             for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
  58.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  59.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  60.                 if begin_comment < str(soup).index(str(h1)) < end_comment:
  61.                     recursively_translate(h1)
  62.  
  63.             for p in soup.findAll('p', class_='text_obisnuit'):
  64.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  65.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  66.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  67.                     recursively_translate(p)
  68.  
  69.             for p in soup.findAll('p', class_='text_obisnuit2'):
  70.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  71.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  72.                 if begin_comment < str(soup).index(str(p)) < end_comment:
  73.                     recursively_translate(p)
  74.  
  75.             for span in soup.findAll('span', class_='text_obisnuit2'):
  76.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  77.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  78.                 if begin_comment < str(soup).index(str(span)) < end_comment:
  79.                     recursively_translate(span)
  80.  
  81.             for li in soup.findAll('li', class_='text_obisnuit'):
  82.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  83.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  84.                 if begin_comment < str(soup).index(str(li)) < end_comment:
  85.                     recursively_translate(li)
  86.  
  87.             for a in soup.findAll('a', class_='linkMare'):
  88.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  89.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  90.                 if begin_comment < str(soup).index(str(a)) < end_comment:
  91.                     recursively_translate(a)
  92.  
  93.             for h4 in soup.findAll('h4', class_='text_obisnuit2'):
  94.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  95.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  96.                 if begin_comment < str(soup).index(str(h4)) < end_comment:
  97.                     recursively_translate(h4)
  98.  
  99.             for h5 in soup.findAll('h5', class_='text_obisnuit2'):
  100.                 begin_comment = str(soup).index('<!-- ARTICOL START -->')
  101.                 end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
  102.                 if begin_comment < str(soup).index(str(h5)) < end_comment:
  103.                     recursively_translate(h5)
  104.  
  105.         print(f'{filename} translated')
  106.         soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
  107.         new_filename = f'{filename.split(".")[0]}.html'
  108.         if use_translate_folder:
  109.             try:
  110.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  111.                     new_html.write(soup[5:-6])
  112.             except:
  113.                 os.mkdir(files_from_folder+r'\translated')
  114.                 with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
  115.                     new_html.write(soup[5:-6])
  116.         else:
  117.             with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
  118.                 html.write(soup[5:-6])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement