Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from __future__ import print_function
- import collections
- import itertools
- import io
- try:
- import cPickle as pickle
- except ImportError:
- import pickle
- import os
- import re
- import sys
- TRANSFILE_ORIG = '/usr/share/trans/de-en'
- TRANSFILE_PICKLE = 'transfile.pkl'
- ENCODING = 'utf-8'
- TranslationEntry = collections.namedtuple(
- 'TranslationEntry', 'originals, translations'
- )
- class Translator(object):
- def __init__(self, translations={}, encoding=ENCODING, autosave=True):
- self.encoding = encoding
- self.autosave = autosave
- self.translations = (
- translations if translations else self.load_translations()
- )
- def __enter__(self):
- return self
- def __exit__(self, *unused):
- if self.autosave:
- self.save_translations()
- def translate(self, word):
- return self.translations.get(word.lower())
- def load_translations(self, pickle_source=TRANSFILE_PICKLE,
- text_source=TRANSFILE_ORIG):
- if os.path.exists(pickle_source):
- with io.open(pickle_source, 'rb') as infile:
- translations = pickle.load(infile)
- else:
- with io.open(text_source, 'rt', encoding=self.encoding) as infile:
- print('Parsing translation file...')
- translations = self.parse_translations(infile)
- print('...done\n')
- return translations
- def save_translations(self, filename=TRANSFILE_PICKLE):
- with io.open(filename, 'wb') as outfile:
- pickle.dump(self.translations, outfile, pickle.HIGHEST_PROTOCOL)
- def parse_translations(self, data):
- translations = {}
- for line in data:
- if line.startswith('#'):
- continue
- cleaned_line = re.sub(r'{.*?}|\[.*?\]', '', line)
- try:
- entry = self.parse_translation_entry(cleaned_line)
- except ValueError:
- print(
- 'Unable to parse {!r}'.format(line),
- file=sys.stderr)
- continue
- for orig, trans in zip(entry.originals, entry.translations):
- translations[orig.lower()] = trans
- return translations
- def parse_translation_entry(self, entry):
- originals, translations = entry.split('::')
- return TranslationEntry(
- self._get_words(originals), self._get_words(translations)
- )
- def _get_words(self, s):
- groups = (group.split('|') for group in s.split(';'))
- words = itertools.chain.from_iterable(groups)
- return [word.strip() for word in words]
- def main():
- if len(sys.argv) != 2:
- print(
- 'Usage: {} word-to-translate'.format(sys.argv[0]),
- file=sys.stderr
- )
- else:
- with Translator() as tr:
- translation = tr.translate(sys.argv[1])
- print(translation)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement