import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class Main {
public static void main(String[] args) throws CorruptIndexException,
LockObtainFailedException, IOException {
// Setting up the index
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
analyzer);
// Populate the index
IndexWriter w = new IndexWriter(index, config);
addBook(w, "My first Bratwurst", "'Amazing' - Financial Times");
addBook(w, "My first Bratwurst", "'Amazing' - Financial Times");// Duplicate
addBook(w, "Financial Studies - Part 1",
"Rainer Bratwurst with his first book");
addBook(w, "Financial Studies - Part 2",
"Author Rainer Bratwurst does not so well in this one");
addBook(w, "Financial Studies - Part 3",
"Rainer Bratwurst my favorite author");
addBook(w, "Amazing Times",
"Author Rainer Bratwurst strikes again. First choice for my Kids.");
addBook(w, "Amazing Times - Back again",
"This one is not like part 1 from author Rainer Bratwurst");
addBook(w, "Why Times New Roman is Amazing",
"An essay I wrote in my first year at college");
w.close();
// Setting up MoreLikeThis
IndexReader ir = IndexReader.open(index);
MoreLikeThis mlt = new MoreLikeThis(ir);
mlt.setFieldNames(new String[] { "title", "content" });
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(1);
// Build the MLT-Query for the first document in the index and execute
// it
Query q = mlt.like(0);
IndexSearcher is = new IndexSearcher(ir);
TopDocs result = is.search(q, 10);
for (ScoreDoc sd : result.scoreDocs) {
// Print all titles similar to "My first Bratwurst" - the duplicate
// is not found
Document document = ir.document(sd.doc);
String title = document.get("title");
System.out.println(title);
// Result:
//
// Amazing Times
// Why Times New Roman is Amazing
// Financial Studies - Part 1
// Financial Studies - Part 3
// Amazing Times - Back again
// Financial Studies - Part 2
}
}
private static void addBook(IndexWriter w, String title, String content)
throws IOException {
Document doc = new Document();
doc.add(new Field("title", title, Store.YES, Index.ANALYZED));
doc.add(new Field("content", content, Store.YES, Index.ANALYZED));
w.addDocument(doc);
}
}