Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static List<String> normalize(String line) throws IOException {
- List<String> normalwords = new ArrayList<String>();
- line = line.toLowerCase();
- CharArraySet set = RussianAnalyzer.getDefaultStopSet();
- RussianLuceneMorphology luceneMorph = new RussianLuceneMorphology();
- String words[] = line.split(" ");
- for (int i=0;i<words.length;i++) {
- String w = words[i].replaceAll("[^а-я]", "");
- if (!w.equals("")) {
- List<String> wordNormalForms = luceneMorph.getNormalForms(w);
- String normalWord = wordNormalForms.get(0);
- if (!set.contains(normalWord)) {
- normalwords.add(normalWord);
- }
- }
- }
- return normalwords;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement