Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pymorphy2 import MorphAnalyzer
- from pymorphy2.units.by_analogy import KnownPrefixAnalyzer, UnknownPrefixAnalyzer
- morph = MorphAnalyzer()
- def cut_prefix(morph, word):
- analyzers = [
- KnownPrefixAnalyzer(morph),
- UnknownPrefixAnalyzer(morph)]
- seen = set()
- word_lower = word.lower()
- res = []
- for analyzer in analyzers:
- parsed = analyzer.parse(word, word_lower, seen)
- if not parsed:
- continue
- tmp = parsed[0]
- res.extend(tmp)
- if len(tmp) >= 5:
- if tmp[4] and tmp[4][0]:
- return morph.normal_forms(tmp[4][0][1])[0]
- return res
- In [126]: cut_prefix(morph, "приукрасила")
- Out[126]: 'украсить'
- In [127]: cut_prefix(morph, "спортлото")
- Out[127]: 'лото'
- In [128]: cut_prefix(morph, "мегаабракадабра")
- Out[128]: 'абракадабра'
- In [129]: cut_prefix(morph, "подловил")
- Out[129]: 'ловить'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement