Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
- package queriestrec;
- import java.io.File;
- import java.io.FileReader;
- import java.io.Reader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopScoreDocCollector;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.util.Version;
- /**
- *
- * @author Celso.araujo
- */
- public class LuceneTrec {
- public static final String FIELD_PATH = "path";
- public static final String FIELD_CONTENTS = "contents";
- public static void run() throws Exception{
- Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English");
- Directory directory = new RAMDirectory(); // RAM index storage
- //Directory directory = FSDirectory.open(new File("/path/to/index")); // disk index storage
- IndexWriter writer = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
- /*Document doc = new Document();
- String title = "title";
- doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED)); // adding title field
- String content = "content";
- doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); // adding content field
- writer.addDocument(doc); // writing new document to the index*/
- File dir = new File("C:\\teste");
- File[] files = dir.listFiles();
- for (File file : files) {
- Document document = new Document();
- String path = file.getCanonicalPath();
- //System.out.println(path)
- String title = "title";
- document.add(new Field("title", path, Field.Store.YES, Field.Index.ANALYZED));
- Reader reader = new FileReader(file);
- document.add(new Field("content", reader));
- writer.addDocument(document);
- }
- writer.close();
- IndexSearcher searcher = new IndexSearcher(directory);
- QueryParser parser = new QueryParser(Version.LUCENE_30, "computer", analyzer);
- Query query = parser.parse("computer");
- TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
- searcher.search(query, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- // `i` is just a number of document in Lucene. Note, that this number may change after document deletion
- for (int i = 0; i < hits.length; i++) {
- Document hitDoc = searcher.doc(hits[i].doc); // getting actual document
- System.out.println("Title: " + hitDoc.get("title"));
- System.out.println("Content: " + hitDoc.get("content"));
- System.out.println();
- }
- searcher.close();
- directory.close();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement