Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf8
- from pathlib import Path
- import json
- import zipfile
- languages = ["ast", "bg", "ca", "cs", "en", "et", "fr", "gl", "de", "hu", "ga", "gv",
- "it", "fa", "pl", "pt", "ro", "gd", "sk", "sl", "es", "sv", "uk", "cy"]
- directory = Path("lemmatization")
- for language in languages:
- p = directory/"lemmatization-{}.txt".format(language)
- file = p.open('r', encoding='utf-8')
- array = [e.strip().split("\t") for e in file]
- try :
- lookup = {t[1]: t[0] for t in array}
- except:
- print(language)
- p = directory/"lemmatization-{}.py".format(language)
- p.open("w", encoding='utf-8').write("# coding: utf8\n")
- p.open("a", encoding='utf-8').write("from __future__ import unicode_literals\n\n")
- content = json.dumps(lookup, sort_keys=True, indent=4,
- separators=(',', ': '), ensure_ascii=False)
- p.open("a", encoding='utf-8').write("LOOK_UP = " + content)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement