Advertisement
Guest User

Untitled

a guest
Apr 26th, 2018
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.35 KB | None | 0 0
  1. import org.apache.lucene.index.IndexWriter;
  2. import org.apache.lucene.index.IndexWriterConfig;
  3. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  4. import org.apache.lucene.document.Document;
  5. import org.apache.lucene.document.Field;
  6. import org.apache.lucene.document.LongPoint;
  7. import org.apache.lucene.document.StringField;
  8. import org.apache.lucene.document.TextField;
  9. import org.apache.commons.io.FileUtils;
  10. import org.apache.lucene.store.FSDirectory;
  11. import org.apache.lucene.store.Directory;
  12. import java.io.File;
  13. import java.io.FileFilter;
  14. import java.io.IOException;
  15. import java.nio.file.Paths;
  16. import java.io.FileReader;
  17.  
  18. public class Indexer {
  19.  
  20.     public static void main(String[] args) throws Exception {
  21.         if (args.length != 4) {
  22.             throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>");
  23.         }
  24.         String indexDir = args[0];
  25.         String dataDir = args[1];
  26.  
  27.         String indexDir500 = args[2];
  28.         String dataDir500 = args[3];
  29.  
  30.         // prvi indeks
  31.         long start = System.currentTimeMillis();
  32.         Indexer indexer = new Indexer(indexDir);
  33.         int numIndexed;
  34.         try {
  35.             numIndexed = indexer.index(dataDir, new TextFilesFilter());
  36.         } finally {
  37.             indexer.close();
  38.         }
  39.         long end = System.currentTimeMillis();
  40.  
  41.         System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
  42.  
  43.         File f = new File(indexDir);
  44.         double sizekb = FileUtils.sizeOfDirectory(f) / 1024;
  45.         System.out.println("Index size is " + sizekb + " KB");
  46.  
  47.         // drugi indeks
  48.         start = System.currentTimeMillis();
  49.         Indexer indexer500 = new Indexer(indexDir500);
  50.  
  51.         try {
  52.             numIndexed = indexer500.index(dataDir500, new TextFilesFilter());
  53.         } finally {
  54.             indexer500.close();
  55.         }
  56.         end = System.currentTimeMillis();
  57.  
  58.         System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
  59.  
  60.         f = new File(indexDir500);
  61.         sizekb = FileUtils.sizeOfDirectory(f) / 1024;
  62.         System.out.println("Index size is " + sizekb + " KB");
  63.  
  64.     }
  65.  
  66.     private IndexWriter writer;
  67.  
  68.     public Indexer(String indexDir) throws IOException {
  69.         Directory dir = FSDirectory.open(Paths.get(indexDir));
  70.         writer = new IndexWriter(dir, new IndexWriterConfig(new StandardAnalyzer()));
  71.     }
  72.  
  73.     public void close() throws IOException {
  74.         writer.close();
  75.     }
  76.  
  77.     public int index(String dataDir, FileFilter filter) throws Exception {
  78.  
  79.         File[] files = new File(dataDir).listFiles();
  80.  
  81.         for (File f : files) {
  82.             if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
  83.                     && (filter == null || filter.accept(f))) {
  84.                 indexFile(f);
  85.             }
  86.         }
  87.  
  88.         return writer.numDocs();
  89.     }
  90.  
  91.     private static class TextFilesFilter implements FileFilter {
  92.         public boolean accept(File path) {
  93.             return path.getName().toLowerCase().endsWith(".txt");
  94.         }
  95.     }
  96.  
  97.     protected Document getDocument(File f) throws Exception {
  98.         Document doc = new Document();
  99.         doc.add(new TextField("contents", new FileReader(f)));
  100.         doc.add(new StringField("filename", f.getName(), Field.Store.YES));
  101.         doc.add(new StringField("fullpath", f.getCanonicalPath(), Field.Store.YES));
  102.         doc.add(new LongPoint("filesize", f.length()));
  103.         return doc;
  104.     }
  105.  
  106.     private void indexFile(File f) throws Exception {
  107.         // System.out.println("Indexing " + f.getCanonicalPath());
  108.         Document doc = getDocument(f);
  109.         writer.addDocument(doc);
  110.  
  111.     }
  112. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement