Advertisement
Guest User

Untitled

a guest
Oct 1st, 2014
266
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 0.67 KB | None | 0 0
  1.        public static List<String> normalize(String line) throws IOException {
  2.         List<String> normalwords = new ArrayList<String>();
  3.         line = line.toLowerCase();
  4.         CharArraySet set = RussianAnalyzer.getDefaultStopSet();
  5.         RussianLuceneMorphology luceneMorph = new RussianLuceneMorphology();
  6.         String words[] = line.split(" ");
  7.          for (int i=0;i<words.length;i++) {
  8.             String w =  words[i].replaceAll("[^а-я]", "");
  9.             if (!w.equals("")) {
  10.                 List<String> wordNormalForms = luceneMorph.getNormalForms(w);
  11.                 String normalWord = wordNormalForms.get(0);
  12.                 if (!set.contains(normalWord)) {
  13.                     normalwords.add(normalWord);
  14.                 }
  15.             }
  16.          }
  17.         return normalwords;
  18.        }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement