Advertisement
Guest User

Untitled

a guest
Apr 26th, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 4.21 KB | None | 0 0
  1. package com.expleague.sensearch.snippet.experiments.pool;
  2.  
  3. import com.expleague.commons.random.FastRandom;
  4. import com.expleague.ml.data.tools.DataTools;
  5. import com.expleague.ml.data.tools.Pool;
  6. import com.expleague.ml.meta.DataSetMeta;
  7. import com.expleague.ml.meta.impl.JsonDataSetMeta;
  8. import com.expleague.sensearch.AppModule;
  9. import com.expleague.sensearch.Page;
  10. import com.expleague.sensearch.Page.SegmentType;
  11. import com.expleague.sensearch.index.Index;
  12. import com.expleague.ml.data.tools.Pool.Builder;
  13. import com.expleague.sensearch.query.BaseQuery;
  14. import com.expleague.sensearch.query.Query;
  15. import com.expleague.sensearch.snippet.experiments.naturalquestions.Data;
  16. import com.expleague.sensearch.snippet.features.AccumulatorFeatureSet;
  17. import com.expleague.sensearch.snippet.features.QPASItem;
  18. import com.expleague.sensearch.snippet.features.TargetFeatureSet;
  19. import com.expleague.sensearch.snippet.passage.Passage;
  20. import com.fasterxml.jackson.databind.ObjectMapper;
  21. import com.google.inject.Guice;
  22. import com.google.inject.Inject;
  23. import com.google.inject.Injector;
  24. import java.io.IOException;
  25. import java.nio.file.Files;
  26. import java.nio.file.Path;
  27. import java.nio.file.Paths;
  28. import java.util.Arrays;
  29. import java.util.Date;
  30. import java.util.List;
  31. import java.util.Optional;
  32. import java.util.concurrent.atomic.AtomicInteger;
  33. import java.util.stream.Collectors;
  34.  
  35. public class SnippetPoolBuilder {
  36.  
  37.   private final Index index;
  38.  
  39.   @Inject
  40.   public SnippetPoolBuilder(Index index) {
  41.     this.index = index;
  42.   }
  43.  
  44.   public static void main(String[] args) throws IOException {
  45.     Injector injector = Guice.createInjector(new AppModule());
  46.     injector.getInstance(SnippetPoolBuilder.class).build(Paths.get("snippet.pool"));
  47.   }
  48.  
  49.   private void build(Path path) {
  50.     FastRandom rand = new FastRandom();
  51.     DataSetMeta meta =
  52.         new JsonDataSetMeta(
  53.             "Google", "sensearch", new Date(), QPASItem.class, rand.nextBase64String(32));
  54.     AccumulatorFeatureSet featureSet = new AccumulatorFeatureSet(index);
  55.     TargetFeatureSet targetFeatureSet = new TargetFeatureSet(index);
  56.  
  57.     Builder<QPASItem> poolBuilder = Pool.builder(meta, featureSet, targetFeatureSet);
  58.  
  59.     AtomicInteger status = new AtomicInteger();
  60.     try {
  61.       byte[] jsonData = Files.readAllBytes(
  62.           Paths.get("./src/main/java/com/expleague/sensearch/snippet/experiments/data.json"));
  63.       ObjectMapper objectMapper = new ObjectMapper();
  64.       Data[] datas = objectMapper.readValue(jsonData, Data[].class);
  65.  
  66.       Arrays.stream(datas)
  67.           //.parallel()
  68.           .forEach(data -> {
  69.             if (status.get() % 10 == 0) {
  70.               System.err.println(status + " datas completed");
  71.             }
  72.  
  73.             status.incrementAndGet();
  74.             Query query = BaseQuery.create(data.getQuery(), index);
  75.             Optional<Page> page = index
  76.                 .allDocuments()
  77.                 .filter(x -> x.content(SegmentType.SECTION_TITLE).equals(data.getTitle()))
  78.                 .findFirst();
  79.             if (page.isPresent()) {
  80.               List<Passage> passages = page.get()
  81.                   .sentences(SegmentType.SUB_BODY)
  82.                   .map(x -> new Passage(x, index.parse(x).collect(Collectors.toList()), page.get()))
  83.                   .collect(Collectors.toList());
  84.               for (int i = 0; i < passages.size(); i++) {
  85.                 passages.get(i).setId(i);
  86.               }
  87.  
  88.              // synchronized (poolBuilder) {
  89.               System.out.println("--------------------");
  90.               //System.out.println(data.getLong_answer());
  91.               //System.out.println();
  92.               //System.out.println(page.get().content(SegmentType.SUB_BODY));
  93.               System.out.println(passages.size());
  94.               for (Passage passage : passages) {
  95.                   poolBuilder.accept(new QPASItem(query, passage));
  96.                   poolBuilder.advance();
  97.                 }
  98.               System.out.println("--------------------");
  99.             //  }
  100.             }
  101.           });
  102.  
  103.       Pool<QPASItem> pool = poolBuilder.create();
  104.       DataTools.writePoolTo(pool, Files.newBufferedWriter(path));
  105.     } catch (IOException e) {
  106.       e.printStackTrace();
  107.     }
  108.  
  109.   }
  110. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement