Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from googletrans import Translator as GoogleTranslator
- from conf import staticConfigs
- from time import sleep
- import backoff
- class Translator:
- def __init__(self):
- self.conf = staticConfigs()
- self.client = GoogleTranslator()
- self.sleep_in_between_translations_seconds = self.conf.sleep_in_between_translations_seconds
- self.source_language = "en"
- self.max_chunk_size = 4000
- def __createChunks(self, corpus):
- chunks = [corpus[i:i + self.max_chunk_size] for i in range(0, len(corpus), self.max_chunk_size)]
- return chunks
- def __sleepBetweenQuery(self):
- print('Sleeping for {}s after translation query..'.format(self.sleep_in_between_translations_seconds))
- sleep(self.sleep_in_between_translations_seconds)
- @backoff.on_exception(backoff.expo, Exception, max_tries=150)
- def Translate(self, content, dest_language_code):
- try:
- print('Attempting to translate to lang={}'.format(dest_language_code))
- if len(content) > self.max_chunk_size:
- print('Warning: Content is longer than allowed size of {}, breaking into chunks'.format(self.max_chunk_size))
- results_list = []
- concatenated_result = ""
- original_chunks = self.__createChunks(content)
- for i in original_chunks:
- r = self.client.translate(i, dest=dest_language_code, src=self.source_language)
- self.__sleepBetweenQuery()
- results_list.append(r.text)
- for i in results_list:
- concatenated_result += i
- return concatenated_result
- else:
- res = self.client.translate(content, dest=dest_language_code, src=self.source_language)
- self.__sleepBetweenQuery()
- return res.text
- except Exception as e:
- print(e)
- raise e
Add Comment
Please, Sign In to add comment