Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.LongPoint;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.document.TextField;
- import org.apache.commons.io.FileUtils;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.Directory;
- import java.io.File;
- import java.io.FileFilter;
- import java.io.IOException;
- import java.nio.file.Paths;
- import java.io.FileReader;
- public class Indexer {
- public static void main(String[] args) throws Exception {
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>");
- }
- String indexDir = args[0];
- String dataDir = args[1];
- String indexDir500 = args[2];
- String dataDir500 = args[3];
- // prvi indeks
- long start = System.currentTimeMillis();
- Indexer indexer = new Indexer(indexDir);
- int numIndexed;
- try {
- numIndexed = indexer.index(dataDir, new TextFilesFilter());
- } finally {
- indexer.close();
- }
- long end = System.currentTimeMillis();
- System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
- File f = new File(indexDir);
- double sizekb = FileUtils.sizeOfDirectory(f) / 1024;
- System.out.println("Index size is " + sizekb + " KB");
- // drugi indeks
- start = System.currentTimeMillis();
- Indexer indexer500 = new Indexer(indexDir500);
- try {
- numIndexed = indexer500.index(dataDir500, new TextFilesFilter());
- } finally {
- indexer500.close();
- }
- end = System.currentTimeMillis();
- System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
- f = new File(indexDir500);
- sizekb = FileUtils.sizeOfDirectory(f) / 1024;
- System.out.println("Index size is " + sizekb + " KB");
- }
- private IndexWriter writer;
- public Indexer(String indexDir) throws IOException {
- Directory dir = FSDirectory.open(Paths.get(indexDir));
- writer = new IndexWriter(dir, new IndexWriterConfig(new StandardAnalyzer()));
- }
- public void close() throws IOException {
- writer.close();
- }
- public int index(String dataDir, FileFilter filter) throws Exception {
- File[] files = new File(dataDir).listFiles();
- for (File f : files) {
- if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
- && (filter == null || filter.accept(f))) {
- indexFile(f);
- }
- }
- return writer.numDocs();
- }
- private static class TextFilesFilter implements FileFilter {
- public boolean accept(File path) {
- return path.getName().toLowerCase().endsWith(".txt");
- }
- }
- protected Document getDocument(File f) throws Exception {
- Document doc = new Document();
- doc.add(new TextField("contents", new FileReader(f)));
- doc.add(new StringField("filename", f.getName(), Field.Store.YES));
- doc.add(new StringField("fullpath", f.getCanonicalPath(), Field.Store.YES));
- doc.add(new LongPoint("filesize", f.length()));
- return doc;
- }
- private void indexFile(File f) throws Exception {
- // System.out.println("Indexing " + f.getCanonicalPath());
- Document doc = getDocument(f);
- writer.addDocument(doc);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement