Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2014
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.24 KB | None | 0 0
  1. import java.io.BufferedReader;
  2. import java.io.File;
  3. import java.io.FileInputStream;
  4. import java.io.InputStreamReader;
  5.  
  6. import org.apache.lucene.analysis.Analyzer;
  7. import org.apache.lucene.analysis.standard.StandardAnalyzer;
  8. import org.apache.lucene.document.Document;
  9. import org.apache.lucene.document.Field;
  10. import org.apache.lucene.document.StoredField;
  11. import org.apache.lucene.document.TextField;
  12. import org.apache.lucene.index.IndexWriter;
  13. import org.apache.lucene.index.IndexWriterConfig;
  14. import org.apache.lucene.index.NoMergePolicy;
  15. import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  16. import org.apache.lucene.store.Directory;
  17. import org.apache.lucene.store.FSDirectory;
  18. import org.apache.lucene.util.Version;
  19.  
  20.  
  21. public class MergeSetup {
  22. public static void main(String args[]) throws Exception {
  23. long startTime = System.currentTimeMillis();
  24. BufferedReader r = new BufferedReader(
  25. new InputStreamReader(
  26. new FileInputStream("/home/rmuir/Downloads/allCountries.txt"), "UTF-8"));
  27. Directory d = FSDirectory.open(new File("/data/indices/geonamesMerged"));
  28. Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
  29. IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, a);
  30. iwc.setMergePolicy(NoMergePolicy.INSTANCE);
  31. iwc.setOpenMode(OpenMode.CREATE);
  32. //iwc.setRAMBufferSizeMB(10);
  33. IndexWriter iw = new IndexWriter(d, iwc);
  34. Document doc = new Document();
  35. Field[] fields = new Field[19];
  36. for (int i = 0; i < fields.length; i++) {
  37. fields[i] = new TextField("field" + i, "", Field.Store.NO);
  38. doc.add(fields[i]);
  39. }
  40. Field stored = new StoredField("_source", "");
  41. doc.add(stored);
  42. String line = null;
  43. int docno = 0;
  44. while ((line = r.readLine()) != null) {
  45. String tokens[] = line.split("\t");
  46. if (tokens.length != 19) {
  47. throw new RuntimeException();
  48. }
  49. for (int i = 0; i < tokens.length; i++) {
  50. fields[i].setStringValue(tokens[i]);
  51. }
  52. stored.setStringValue(line);
  53. iw.addDocument(doc);
  54. }
  55. iw.shutdown();
  56. d.close();
  57. r.close();
  58. long endTime = System.currentTimeMillis();
  59. System.out.println("indexed in: " + (endTime - startTime));
  60. }
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement