Advertisement
gaelikun

Lucene 2.0 Analyzer

Mar 23rd, 2014
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 1.19 KB | None | 0 0
  1. package org.mywebapp.index;
  2.  
  3. import org.apache.lucene.analysis.*;
  4. import org.apache.lucene.analysis.standard.StandardFilter;
  5. import org.apache.lucene.analysis.standard.StandardTokenizer;
  6.  
  7. import java.io.Reader;
  8. import java.util.Set;
  9.  
  10. public class MyWebAppAnalyzer
  11.         extends Analyzer {
  12.     private static Set _stopSet;
  13.     private static final String[] SMART_STOP_WORDS = {
  14. // About 500 stop words go here: "word1", "word2", etc.
  15.     };
  16.  
  17.     public MyWebAppAnalyzer() {
  18.         this(SMART_STOP_WORDS);
  19.     }
  20.  
  21.     public MyWebAppAnalyzer(String[] stopWords) {
  22.         _stopSet = StopFilter.makeStopSet(stopWords);
  23.     }
  24.  
  25.     public TokenStream tokenStream(String string, Reader reader) {
  26.         return new PorterStemFilter(
  27.                 new ISOLatin1AccentFilter(
  28.                         new StopFilter(
  29.                                 new LowerCaseFilter(
  30.                                         new StandardFilter(
  31.                                                 new StandardTokenizer(reader)
  32.                                         )
  33.                                 ),
  34.                                 _stopSet
  35.                         )
  36.                 )
  37.         );
  38.     }
  39. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement