Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

search txt files

By: a guest on Nov 4th, 2010  |  syntax: Java 5  |  size: 3.38 KB  |  views: 622  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. /*
  2.  * To change this template, choose Tools | Templates
  3.  * and open the template in the editor.
  4.  */
  5. package queriestrec;
  6.  
  7.  
  8. import java.io.File;
  9. import java.io.FileReader;
  10. import java.io.Reader;
  11. import org.apache.lucene.analysis.Analyzer;
  12. import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
  13. import org.apache.lucene.document.Document;
  14. import org.apache.lucene.document.Field;
  15. import org.apache.lucene.index.IndexWriter;
  16. import org.apache.lucene.queryParser.QueryParser;
  17. import org.apache.lucene.search.IndexSearcher;
  18. import org.apache.lucene.search.Query;
  19. import org.apache.lucene.search.ScoreDoc;
  20. import org.apache.lucene.search.TopScoreDocCollector;
  21. import org.apache.lucene.store.Directory;
  22. import org.apache.lucene.store.RAMDirectory;
  23. import org.apache.lucene.util.Version;
  24.  
  25. /**
  26.  *
  27.  * @author Celso.araujo
  28.  */
  29. public class LuceneTrec {
  30.  
  31.         public static final String FIELD_PATH = "path";
  32.         public static final String FIELD_CONTENTS = "contents";
  33.  
  34.         public static void run() throws Exception{
  35.  
  36.             Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English");
  37.             Directory directory = new RAMDirectory();   // RAM index storage
  38.             //Directory directory = FSDirectory.open(new File("/path/to/index"));  // disk index storage
  39.  
  40.  
  41.             IndexWriter writer = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
  42.  
  43.  
  44.             /*Document doc = new Document();
  45.             String title = "title";
  46.             doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));  // adding title field
  47.             String content = "content";
  48.             doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED)); // adding content field
  49.             writer.addDocument(doc);  // writing new document to the index*/
  50.  
  51.  
  52.             File dir = new File("C:\\teste");
  53.             File[] files = dir.listFiles();
  54.  
  55.             for (File file : files) {
  56.  
  57.                 Document document = new Document();
  58.                 String path = file.getCanonicalPath();
  59.  
  60.                 //System.out.println(path)
  61.  
  62.                 String title = "title";
  63.                 document.add(new Field("title", path, Field.Store.YES, Field.Index.ANALYZED));
  64.  
  65.                 Reader reader = new FileReader(file);
  66.  
  67.                 document.add(new Field("content", reader));
  68.  
  69.  
  70.                 writer.addDocument(document);
  71.  
  72.  
  73.             }
  74.  
  75.  
  76.  
  77.             writer.close();
  78.  
  79.             IndexSearcher searcher = new IndexSearcher(directory);
  80.             QueryParser parser = new QueryParser(Version.LUCENE_30, "computer", analyzer);
  81.             Query query = parser.parse("computer");
  82.             TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
  83.             searcher.search(query, collector);
  84.  
  85.             ScoreDoc[] hits = collector.topDocs().scoreDocs;
  86.             // `i` is just a number of document in Lucene. Note, that this number may change after document deletion
  87.             for (int i = 0; i < hits.length; i++) {
  88.                 Document hitDoc = searcher.doc(hits[i].doc);  // getting actual document
  89.                 System.out.println("Title: " + hitDoc.get("title"));
  90.                 System.out.println("Content: " + hitDoc.get("content"));
  91.                 System.out.println();
  92.             }
  93.  
  94.  
  95.             searcher.close();
  96.             directory.close();
  97.  
  98.  
  99.  
  100.  
  101.  
  102.  
  103.  
  104.  
  105.         }
  106.  
  107.  
  108.  
  109.  }
clone this paste RAW Paste Data