Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package org.mywebapp.index;
- import org.apache.lucene.analysis.*;
- import org.apache.lucene.analysis.standard.StandardFilter;
- import org.apache.lucene.analysis.standard.StandardTokenizer;
- import java.io.Reader;
- import java.util.Set;
- public class MyWebAppAnalyzer
- extends Analyzer {
- private static Set _stopSet;
- private static final String[] SMART_STOP_WORDS = {
- // About 500 stop words go here: "word1", "word2", etc.
- };
- public MyWebAppAnalyzer() {
- this(SMART_STOP_WORDS);
- }
- public MyWebAppAnalyzer(String[] stopWords) {
- _stopSet = StopFilter.makeStopSet(stopWords);
- }
- public TokenStream tokenStream(String string, Reader reader) {
- return new PorterStemFilter(
- new ISOLatin1AccentFilter(
- new StopFilter(
- new LowerCaseFilter(
- new StandardFilter(
- new StandardTokenizer(reader)
- )
- ),
- _stopSet
- )
- )
- );
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement