Guest User

Untitled

a guest
Feb 19th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.93 KB | None | 0 0
  1. import os
  2. import sys
  3. import time
  4. import argparse
  5. from google.cloud import translate
  6.  
  7. #####progress bar start #####
  8. def format_interval(t):
  9. mins, s = divmod(int(t), 60)
  10. h, m = divmod(mins, 60)
  11. if h:
  12. return '%d:%02d:%02d' % (h, m, s)
  13. else:
  14. return '%02d:%02d' % (m, s)
  15.  
  16.  
  17. def format_meter(n, total, elapsed):
  18. if n > total:
  19. total = None
  20. elapsed_str = format_interval(elapsed)
  21. rate = '%5.2f' % (n / elapsed) if elapsed else '?'
  22. if total:
  23. frac = float(n) / total
  24. N_BARS = 10
  25. bar_length = int(frac * N_BARS)
  26. bar = '#' * bar_length + '-' * (N_BARS - bar_length)
  27. percentage = '%3d%%' % (frac * 100)
  28. left_str = format_interval(elapsed / n * (total - n)) if n else '?'
  29. return '|%s| %d/%d %s [elapsed: %s left: %s, %s iters/sec]' % (bar, n, total, percentage, elapsed_str, left_str, rate)
  30. else:
  31. return '%d [elapsed: %s, %s iters/sec]' % (n, elapsed_str, rate)
  32.  
  33.  
  34. class StatusPrinter(object):
  35. def __init__(self, file):
  36. self.file = file
  37. self.last_printed_len = 0
  38.  
  39. def print_status(self, s):
  40. self.file.write('\r' + s + ' ' * max(self.last_printed_len - len(s), 0))
  41. self.file.flush()
  42. self.last_printed_len = len(s)
  43.  
  44.  
  45. def tqdm(iterable, desc='', total=None, leave=False, file=sys.stderr, mininterval=0.5, miniters=1):
  46. if total is None:
  47. try:
  48. total = len(iterable)
  49. except TypeError:
  50. total = None
  51.  
  52. prefix = desc + ': ' if desc else ''
  53. sp = StatusPrinter(file)
  54. sp.print_status(prefix + format_meter(0, total, 0))
  55. start_t = last_print_t = time.time()
  56. last_print_n = 0
  57. n = 0
  58. for obj in iterable:
  59. yield obj
  60. n += 1
  61. if n - last_print_n >= miniters:
  62. cur_t = time.time()
  63. if cur_t - last_print_t >= mininterval:
  64. sp.print_status(prefix + format_meter(n, total, cur_t - start_t))
  65. last_print_n = n
  66. last_print_t = cur_t
  67. if not leave:
  68. sp.print_status('')
  69. sys.stdout.write('\r')
  70. else:
  71. if last_print_n < n:
  72. cur_t = time.time()
  73. sp.print_status(prefix + format_meter(n, total, cur_t - start_t))
  74. file.write('\n')
  75. #####progress bar end #####
  76.  
  77. #####split file by new line and store in memory#####
  78. def load_file(file_path, split_on='\n'):
  79. # function takes the text file path, read it, split it on passed split_on (default is \n [newline])
  80. # and returns the list acquired
  81.  
  82. # open the file in read mode, encoding utf-8
  83. fp = open(file=file_path, encoding='utf-8', errors='ignore')
  84. # read the data from the file
  85. _data = fp.read()
  86. # close the file
  87. fp.close()
  88. # split the data over split_on
  89. _data = _data.strip().split(split_on)
  90. # return the list acquired above
  91. return _data
  92.  
  93.  
  94. def handle_data(data, split_on='.'):
  95. # function receives text, split it over split_on (default is .) and returns the list acquired
  96. # split and return the data over split_on
  97. return data.split(split_on)
  98.  
  99. #####translate and build output#####
  100. if __name__ == '__main__':
  101. parser = argparse.ArgumentParser(description='Argument Parser For The Script.')
  102. parser.add_argument('input_file', help='Input text file name/path')
  103. parser.add_argument('output_file', help='Output text file name/path')
  104.  
  105. args = parser.parse_args()
  106. # set the creds.json in the current directory as the GOOGLE_APPLICATION_CREDENTIALS of the system
  107. os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.curdir, 'creds.json')
  108.  
  109. # create the translate client
  110. translate_client = translate.Client()
  111. # create an empty list called translated
  112. translated = list()
  113. # call load_file function with the input_file to get the list of sentences (splitted on \n)
  114. # put it in tqdm (to show the progress, and for each d in the result
  115. for d in tqdm(load_file(args.input_file), desc='Input Text Loop'):
  116. # try to append the result of the translation of d to the translated list
  117. try:
  118. translated.append(translate_client.translate(d, target_language='en')['translatedText'])
  119. # if it fails
  120. except Exception as e:
  121. # create empty _temp list
  122. _tmp = list()
  123. # call the handle_data function wih d to split it over . and for each _d in that
  124. for _d in handle_data(d):
  125. # try
  126. try:
  127. # append the result of the translation of _d to the _tmp list
  128. _tmp.append(translate_client.translate(_d, target_language='en')['translatedText'])
  129. except:
  130. continue
  131. # join the results with .and append to the translated list
  132. translated.append('.'.join(_tmp))
  133. # at last open the outputfile in write mode, join the translated over \n and arite it to the file
  134. open(file=args.output_file, mode='w', encoding='utf-8', errors='ignore').write('\n'.join(translated))
Add Comment
Please, Sign In to add comment