View difference between Paste ID: fwdENb3F and NCTmPTya
SHOW: | | - or go back to the newest paste.
1
import java.io.IOException;
2
3
import org.apache.lucene.analysis.standard.StandardAnalyzer;
4
import org.apache.lucene.document.Document;
5
import org.apache.lucene.document.Field;
6
import org.apache.lucene.document.Field.Index;
7
import org.apache.lucene.document.Field.Store;
8
import org.apache.lucene.index.CorruptIndexException;
9
import org.apache.lucene.index.IndexReader;
10
import org.apache.lucene.index.IndexWriter;
11
import org.apache.lucene.index.IndexWriterConfig;
12
import org.apache.lucene.search.IndexSearcher;
13
import org.apache.lucene.search.Query;
14
import org.apache.lucene.search.ScoreDoc;
15
import org.apache.lucene.search.TopDocs;
16
import org.apache.lucene.search.similar.MoreLikeThis;
17
import org.apache.lucene.store.Directory;
18
import org.apache.lucene.store.LockObtainFailedException;
19
import org.apache.lucene.store.RAMDirectory;
20
import org.apache.lucene.util.Version;
21
22
public class Main {
23
	public static void main(String[] args) throws CorruptIndexException,
24-
	public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException{
24+
			LockObtainFailedException, IOException {
25-
		//Setting up the index
25+
		// Setting up the index
26
		StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
27
		Directory index = new RAMDirectory();
28-
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
28+
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
29
				analyzer);
30-
		//Populate the index
30+
31
		// Populate the index
32
		IndexWriter w = new IndexWriter(index, config);
33-
		addBook(w, "My first Bratwurst", "'Amazing' - Financial Times"); //identical to the first one
33+
34-
		addBook(w, "Financial Studies - Part 1", "Rainer Bratwurst with his first book");
34+
		addBook(w, "My first Bratwurst", "'Amazing' - Financial Times");// Duplicate
35-
		addBook(w, "Financial Studies - Part 2", "Author Rainer Bratwurst does not so well in this one");
35+
		addBook(w, "Financial Studies - Part 1",
36-
		addBook(w, "Financial Studies - Part 3", "Rainer Bratwurst my favorite author");
36+
				"Rainer Bratwurst with his first book");
37-
		addBook(w, "Amazing Times", "Author Rainer Bratwurst strikes again. First choice for my Kids.");
37+
		addBook(w, "Financial Studies - Part 2",
38-
		addBook(w, "Amazing Times - Back again", "This one is not like part 1 from author Rainer Bratwurst");
38+
				"Author Rainer Bratwurst does not so well in this one");
39-
		addBook(w, "Why Times New Roman is Amazing", "An essay I wrote in my first year at college");
39+
		addBook(w, "Financial Studies - Part 3",
40
				"Rainer Bratwurst my favorite author");
41-
		
41+
		addBook(w, "Amazing Times",
42-
		//Setting up MoreLikeThis
42+
				"Author Rainer Bratwurst strikes again. First choice for my Kids.");
43
		addBook(w, "Amazing Times - Back again",
44
				"This one is not like part 1 from author Rainer Bratwurst");
45-
		mlt.setFieldNames(new String[]{"title", "content"});
45+
		addBook(w, "Why Times New Roman is Amazing",
46
				"An essay I wrote in my first year at college");
47
		w.close();
48-
		
48+
49-
		//Build the MLT-Query for the first document in the index and execute it
49+
		// Setting up MoreLikeThis
50
		IndexReader ir = IndexReader.open(index);
51
		MoreLikeThis mlt = new MoreLikeThis(ir);
52
		mlt.setFieldNames(new String[] { "title", "content" });
53-
		for(ScoreDoc sd : result.scoreDocs){
53+
54-
			//Print all titles similar to "My first Bratwurst" - the duplicate is not found
54+
55
56
		// Build the MLT-Query for the first document in the index and execute
57
		// it
58
		Query q = mlt.like(0);
59-
		
59+
60
		TopDocs result = is.search(q, 10);
61-
	private static void addBook(IndexWriter w, String title, String content) throws IOException {
61+
		for (ScoreDoc sd : result.scoreDocs) {
62-
		  Document doc = new Document();
62+
			// Print all titles similar to "My first Bratwurst" - the duplicate
63-
		  doc.add(new Field("title", title, Store.YES, Index.ANALYZED));
63+
			// is not found
64-
		  doc.add(new Field("content", content, Store.YES, Index.ANALYZED));
64+
65-
		  w.addDocument(doc);
65+
66
			System.out.println(title);
67
			// Result:
68
			//		  
69
			// Amazing Times
70
			// Why Times New Roman is Amazing
71
			// Financial Studies - Part 1
72
			// Financial Studies - Part 3
73
			// Amazing Times - Back again
74
			// Financial Studies - Part 2
75
		}
76
77
	}
78
79
	private static void addBook(IndexWriter w, String title, String content)
80
			throws IOException {
81
		Document doc = new Document();
82
		doc.add(new Field("title", title, Store.YES, Index.ANALYZED));
83
		doc.add(new Field("content", content, Store.YES, Index.ANALYZED));
84
		w.addDocument(doc);
85
	}
86
87
}