/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package queriestrec;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
*
* @author Celso.araujo
*/
public class LuceneTrec {
public static final String FIELD_PATH = "path";
public static final String FIELD_CONTENTS = "contents";
public static void run() throws Exception{
Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English");
Directory directory = new RAMDirectory(); // RAM index storage
//Directory directory = FSDirectory.open(new File("/path/to/index")); // disk index storage
IndexWriter writer = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
/*Document doc = new Document();
String title = "title";
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED)); // adding title field
String content = "content";
doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); // adding content field
writer.addDocument(doc); // writing new document to the index*/
File dir = new File("C:\\teste");
File[] files = dir.listFiles();
for (File file : files) {
Document document = new Document();
String path = file.getCanonicalPath();
//System.out.println(path)
String title = "title";
document.add(new Field("title", path, Field.Store.YES, Field.Index.ANALYZED));
Reader reader = new FileReader(file);
document.add(new Field("content", reader));
writer.addDocument(document);
}
writer.close();
IndexSearcher searcher = new IndexSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_30, "computer", analyzer);
Query query = parser.parse("computer");
TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
// `i` is just a number of document in Lucene. Note, that this number may change after document deletion
for (int i = 0; i < hits.length; i++) {
Document hitDoc = searcher.doc(hits[i].doc); // getting actual document
System.out.println("Title: " + hitDoc.get("title"));
System.out.println("Content: " + hitDoc.get("content"));
System.out.println();
}
searcher.close();
directory.close();
}
}