Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Explanations:
- ENGLISH: https://neculaifantanaru.com/en/how-to-python-code-google-translate-website.html
- ---------------------
- from bs4 import BeautifulSoup
- from bs4.formatter import HTMLFormatter
- import requests
- import sys
- import os
- class UnsortedAttributes(HTMLFormatter):
- def attributes(self, tag):
- for k, v in tag.attrs.items():
- yield k, v
- files_from_folder = r"c:\Folder2\translated"
- use_translate_folder = True
- destination_language = 'vi' #aici schimbi limba in care vrei sa traduci
- extension_file = ".html"
- directory = os.fsencode(files_from_folder)
- def translate(text, target_language):
- url = "https://translate.google.com/translate_a/single"
- headers = {
- "Host": "translate.google.com",
- "Accept": "*/*",
- "Cookie": "",
- "User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
- "Accept-Language": "fr",
- "Accept-Encoding": "gzip, deflate",
- "Connection": "keep-alive",
- }
- sentence = text
- params = {
- "client": "it",
- "dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
- "otf": "2",
- "dj": "1",
- "q": sentence,
- "hl": "ja",
- "ie": "UTF-8",
- "oe": "UTF-8",
- "sl": "en",
- "tl": target_language,
- }
- res = requests.get(
- url=url,
- headers=headers,
- params=params,
- )
- res = res.json()
- paragraph = ''
- for i in range(0, len(res["sentences"])):
- paragraph += res["sentences"][i]["trans"]
- return paragraph
- def recursively_translate(node):
- for x in range(len(node.contents)):
- if isinstance(node.contents[x], str):
- if node.contents[x].strip() != '':
- try:
- node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
- except:
- pass
- elif node.contents[x] != None:
- recursively_translate(node.contents[x])
- for file in os.listdir(directory):
- filename = os.fsdecode(file)
- print(filename)
- if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
- continue
- if filename.endswith(extension_file):
- with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
- soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
- for title in soup.findAll('title'):
- recursively_translate(title)
- for meta in soup.findAll('meta', {'name':'description'}):
- try:
- meta['content'] = translate(text=meta['content'], target_language=destination_language)
- except:
- pass
- for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(h1)) < end_comment:
- recursively_translate(h1)
- for p in soup.findAll('p', class_='text_obisnuit'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(p)) < end_comment:
- recursively_translate(p)
- for p in soup.findAll('p', class_='text_obisnuit2'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(p)) < end_comment:
- recursively_translate(p)
- for span in soup.findAll('span', class_='text_obisnuit2'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(span)) < end_comment:
- recursively_translate(span)
- for li in soup.findAll('li', class_='text_obisnuit'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(li)) < end_comment:
- recursively_translate(li)
- for a in soup.findAll('a', class_='linkMare'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(a)) < end_comment:
- recursively_translate(a)
- for h4 in soup.findAll('h4', class_='text_obisnuit2'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(h4)) < end_comment:
- recursively_translate(h4)
- for h5 in soup.findAll('h5', class_='text_obisnuit2'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(h5)) < end_comment:
- recursively_translate(h5)
- for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
- begin_comment = str(soup).index('<!-- ARTICOL START -->')
- end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
- if begin_comment < str(soup).index(str(h1)) < end_comment:
- recursively_translate(h1)
- print(f'{filename} translated')
- soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
- new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
- if use_translate_folder:
- try:
- with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
- new_html.write(soup[5:-6])
- except:
- os.mkdir(files_from_folder+r'\translated')
- with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
- new_html.write(soup[5:-6])
- else:
- with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
- html.write(soup[5:-6])
Add Comment
Please, Sign In to add comment