View difference between Paste ID: <a href="/fwdENb3F">fwdENb3F</a> and <a href="/NCTmPTya">NCTmPTya</a>

import java.io.IOException;
1		import java.io.IOException;
2
3		import org.apache.lucene.analysis.standard.StandardAnalyzer;
4		import org.apache.lucene.document.Document;
5		import org.apache.lucene.document.Field;
6		import org.apache.lucene.document.Field.Index;
7		import org.apache.lucene.document.Field.Store;
8		import org.apache.lucene.index.CorruptIndexException;
9		import org.apache.lucene.index.IndexReader;
10		import org.apache.lucene.index.IndexWriter;
11		import org.apache.lucene.index.IndexWriterConfig;
12		import org.apache.lucene.search.IndexSearcher;
13		import org.apache.lucene.search.Query;
14		import org.apache.lucene.search.ScoreDoc;
15		import org.apache.lucene.search.TopDocs;
16		import org.apache.lucene.search.similar.MoreLikeThis;
17		import org.apache.lucene.store.Directory;
18		import org.apache.lucene.store.LockObtainFailedException;
19		import org.apache.lucene.store.RAMDirectory;
20		import org.apache.lucene.util.Version;
21
22		public class Main {
23		public static void main(String[] args) throws CorruptIndexException,
24	-	public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException{
24	+	LockObtainFailedException, IOException {
25	-	//Setting up the index
25	+	// Setting up the index
26		StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
27		Directory index = new RAMDirectory();
28	-	IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
28	+	IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
29		analyzer);
30	-	//Populate the index
30	+
31		// Populate the index
32		IndexWriter w = new IndexWriter(index, config);
33	-	addBook(w, "My first Bratwurst", "'Amazing' - Financial Times"); //identical to the first one
33	+
34	-	addBook(w, "Financial Studies - Part 1", "Rainer Bratwurst with his first book");
34	+	addBook(w, "My first Bratwurst", "'Amazing' - Financial Times");// Duplicate
35	-	addBook(w, "Financial Studies - Part 2", "Author Rainer Bratwurst does not so well in this one");
35	+	addBook(w, "Financial Studies - Part 1",
36	-	addBook(w, "Financial Studies - Part 3", "Rainer Bratwurst my favorite author");
36	+	"Rainer Bratwurst with his first book");
37	-	addBook(w, "Amazing Times", "Author Rainer Bratwurst strikes again. First choice for my Kids.");
37	+	addBook(w, "Financial Studies - Part 2",
38	-	addBook(w, "Amazing Times - Back again", "This one is not like part 1 from author Rainer Bratwurst");
38	+	"Author Rainer Bratwurst does not so well in this one");
39	-	addBook(w, "Why Times New Roman is Amazing", "An essay I wrote in my first year at college");
39	+	addBook(w, "Financial Studies - Part 3",
40		"Rainer Bratwurst my favorite author");
41	-
41	+	addBook(w, "Amazing Times",
42	-	//Setting up MoreLikeThis
42	+	"Author Rainer Bratwurst strikes again. First choice for my Kids.");
43		addBook(w, "Amazing Times - Back again",
44		"This one is not like part 1 from author Rainer Bratwurst");
45	-	mlt.setFieldNames(new String[]{"title", "content"});
45	+	addBook(w, "Why Times New Roman is Amazing",
46		"An essay I wrote in my first year at college");
47		w.close();
48	-
48	+
49	-	//Build the MLT-Query for the first document in the index and execute it
49	+	// Setting up MoreLikeThis
50		IndexReader ir = IndexReader.open(index);
51		MoreLikeThis mlt = new MoreLikeThis(ir);
52		mlt.setFieldNames(new String[] { "title", "content" });
53	-	for(ScoreDoc sd : result.scoreDocs){
53	+
54	-	//Print all titles similar to "My first Bratwurst" - the duplicate is not found
54	+
55
56		// Build the MLT-Query for the first document in the index and execute
57		// it
58		Query q = mlt.like(0);
59	-
59	+
60		TopDocs result = is.search(q, 10);
61	-	private static void addBook(IndexWriter w, String title, String content) throws IOException {
61	+	for (ScoreDoc sd : result.scoreDocs) {
62	-	Document doc = new Document();
62	+	// Print all titles similar to "My first Bratwurst" - the duplicate
63	-	doc.add(new Field("title", title, Store.YES, Index.ANALYZED));
63	+	// is not found
64	-	doc.add(new Field("content", content, Store.YES, Index.ANALYZED));
64	+
65	-	w.addDocument(doc);
65	+
66		System.out.println(title);
67		// Result:
68		//
69		// Amazing Times
70		// Why Times New Roman is Amazing
71		// Financial Studies - Part 1
72		// Financial Studies - Part 3
73		// Amazing Times - Back again
74		// Financial Studies - Part 2
75		}
76
77		}
78
79		private static void addBook(IndexWriter w, String title, String content)
80		throws IOException {
81		Document doc = new Document();
82		doc.add(new Field("title", title, Store.YES, Index.ANALYZED));
83		doc.add(new Field("content", content, Store.YES, Index.ANALYZED));
84		w.addDocument(doc);
85		}
86
87		}