Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.StoredField;
- import org.apache.lucene.document.TextField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.NoMergePolicy;
- import org.apache.lucene.index.IndexWriterConfig.OpenMode;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class MergeSetup {
- public static void main(String args[]) throws Exception {
- long startTime = System.currentTimeMillis();
- BufferedReader r = new BufferedReader(
- new InputStreamReader(
- new FileInputStream("/home/rmuir/Downloads/allCountries.txt"), "UTF-8"));
- Directory d = FSDirectory.open(new File("/data/indices/geonamesMerged"));
- Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
- IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, a);
- iwc.setMergePolicy(NoMergePolicy.INSTANCE);
- iwc.setOpenMode(OpenMode.CREATE);
- //iwc.setRAMBufferSizeMB(10);
- IndexWriter iw = new IndexWriter(d, iwc);
- Document doc = new Document();
- Field[] fields = new Field[19];
- for (int i = 0; i < fields.length; i++) {
- fields[i] = new TextField("field" + i, "", Field.Store.NO);
- doc.add(fields[i]);
- }
- Field stored = new StoredField("_source", "");
- doc.add(stored);
- String line = null;
- int docno = 0;
- while ((line = r.readLine()) != null) {
- String tokens[] = line.split("\t");
- if (tokens.length != 19) {
- throw new RuntimeException();
- }
- for (int i = 0; i < tokens.length; i++) {
- fields[i].setStringValue(tokens[i]);
- }
- stored.setStringValue(line);
- iw.addDocument(doc);
- }
- iw.shutdown();
- d.close();
- r.close();
- long endTime = System.currentTimeMillis();
- System.out.println("indexed in: " + (endTime - startTime));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement