Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import codecs
- DICT_PATH = "dict/polishMorfologyDict.dic"
- class PolishDict:
- __content = ""
- def __init__(self):
- self.__content = open(DICT_PATH, encoding = "windows-1252").read();
- def findInDict(self,wanted_word):
- searched = re.search(r'' + wanted_word + '.*',self.__content)
- if(searched):
- return self.normalizeFoundText(searched.group())
- else:
- return ""
- def normalizeFoundText(self,word):
- word_form = self.searchWordForm(word)
- wanted_word = re.search(r'^\w+' ,word)
- if(wanted_word):
- wanted_word = wanted_word.group()
- if(word_form != "N"):
- trim_index = re.search(r'\d' ,word)
- trim_index = trim_index.group()
- wanted_word = wanted_word[:-int(trim_index)]
- return wanted_word
- def searchWordForm(self,string):
- word_form = re.search(r',.*/' ,string)
- if(word_form):
- word_form = word_form.group()
- word_form = word_form[1:-1]
- return word_form
- Dict = PolishDict()
- print(Dict.findInDict("lewy"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement