Advertisement
Guest User

Untitled

a guest
Apr 28th, 2017
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.94 KB | None | 0 0
  1. # coding: utf8
  2. from pathlib import Path
  3. import json
  4. import zipfile
  5.  
  6. languages = ["ast", "bg", "ca", "cs", "en", "et", "fr", "gl", "de", "hu", "ga", "gv",
  7.              "it", "fa", "pl", "pt", "ro", "gd", "sk", "sl", "es", "sv", "uk", "cy"]
  8.  
  9. directory = Path("lemmatization")
  10. for language in languages:
  11.     p = directory/"lemmatization-{}.txt".format(language)
  12.     file = p.open('r', encoding='utf-8')
  13.     array = [e.strip().split("\t") for e in file]
  14.     try :
  15.         lookup = {t[1]: t[0] for t in array}
  16.     except:
  17.         print(language)
  18.     p = directory/"lemmatization-{}.py".format(language)
  19.     p.open("w", encoding='utf-8').write("# coding: utf8\n")
  20.     p.open("a", encoding='utf-8').write("from __future__ import unicode_literals\n\n")
  21.     content = json.dumps(lookup, sort_keys=True, indent=4,
  22.                          separators=(',', ': '), ensure_ascii=False)
  23.     p.open("a", encoding='utf-8').write("LOOK_UP = " + content)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement