Advertisement
Guest User

Untitled

a guest
Sep 16th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 9.23 KB | None | 0 0
  1.   @Override
  2.   public boolean seekExact(BytesRef target) throws IOException {
  3.  
  4.     if (fr.index == null) {
  5.       throw new IllegalStateException("terms index was not loaded");
  6.     }
  7.  
  8.     term.grow(1 + target.length);
  9.  
  10.     assert clearEOF();
  11.  
  12.     // if (DEBUG) {
  13.     //   System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
  14.     //   printSeekState(System.out);
  15.     // }
  16.  
  17.     FST.Arc<BytesRef> arc;
  18.     int targetUpto;
  19.     BytesRef output;
  20.  
  21.     targetBeforeCurrentLength = currentFrame.ord;
  22.  
  23.     if (currentFrame != staticFrame) {
  24.  
  25.       // We are already seek'd; find the common
  26.       // prefix of new seek term vs current term and
  27.       // re-use the corresponding seek state.  For
  28.       // example, if app first seeks to foobar, then
  29.       // seeks to foobaz, we can re-use the seek state
  30.       // for the first 5 bytes.
  31.  
  32.       // if (DEBUG) {
  33.       //   System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
  34.       // }
  35.  
  36.       arc = arcs[0];
  37.       assert arc.isFinal();
  38.       output = arc.output;
  39.       targetUpto = 0;
  40.          
  41.       SegmentTermsEnumFrame lastFrame = stack[0];
  42.       assert validIndexPrefix <= term.length();
  43.  
  44.       final int targetLimit = Math.min(target.length, validIndexPrefix);
  45.  
  46.       int cmp = 0;
  47.  
  48.       // TODO: reverse vLong byte order for better FST
  49.       // prefix output sharing
  50.  
  51.       // First compare up to valid seek frames:
  52.       while (targetUpto < targetLimit) {
  53.         cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
  54.         // if (DEBUG) {
  55.         //    System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output + " output=" + output);
  56.         // }
  57.         if (cmp != 0) {
  58.           break;
  59.         }
  60.         arc = arcs[1+targetUpto];
  61.         assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
  62.         if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
  63.           output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
  64.         }
  65.         if (arc.isFinal()) {
  66.           lastFrame = stack[1+lastFrame.ord];
  67.         }
  68.         targetUpto++;
  69.       }
  70.  
  71.       if (cmp == 0) {
  72.         final int targetUptoMid = targetUpto;
  73.  
  74.         // Second compare the rest of the term, but
  75.         // don't save arc/output/frame; we only do this
  76.         // to find out if the target term is before,
  77.         // equal or after the current term
  78.         final int targetLimit2 = Math.min(target.length, term.length());
  79.         while (targetUpto < targetLimit2) {
  80.           cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
  81.           // if (DEBUG) {
  82.           //    System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
  83.           // }
  84.           if (cmp != 0) {
  85.             break;
  86.           }
  87.           targetUpto++;
  88.         }
  89.  
  90.         if (cmp == 0) {
  91.           cmp = term.length() - target.length;
  92.         }
  93.         targetUpto = targetUptoMid;
  94.       }
  95.  
  96.       if (cmp < 0) {
  97.         // Common case: target term is after current
  98.         // term, ie, app is seeking multiple terms
  99.         // in sorted order
  100.         // if (DEBUG) {
  101.         //   System.out.println("  target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
  102.         // }
  103.         currentFrame = lastFrame;
  104.  
  105.       } else if (cmp > 0) {
  106.         // Uncommon case: target term
  107.         // is before current term; this means we can
  108.         // keep the currentFrame but we must rewind it
  109.         // (so we scan from the start)
  110.         targetBeforeCurrentLength = lastFrame.ord;
  111.         // if (DEBUG) {
  112.         //   System.out.println("  target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
  113.         // }
  114.         currentFrame = lastFrame;
  115.         currentFrame.rewind();
  116.       } else {
  117.         // Target is exactly the same as current term
  118.         assert term.length() == target.length;
  119.         if (termExists) {
  120.           // if (DEBUG) {
  121.           //   System.out.println("  target is same as current; return true");
  122.           // }
  123.           return true;
  124.         } else {
  125.           // if (DEBUG) {
  126.           //   System.out.println("  target is same as current but term doesn't exist");
  127.           // }
  128.         }
  129.         //validIndexPrefix = currentFrame.depth;
  130.         //term.length = target.length;
  131.         //return termExists;
  132.       }
  133.  
  134.     } else {
  135.  
  136.       targetBeforeCurrentLength = -1;
  137.       arc = fr.index.getFirstArc(arcs[0]);
  138.  
  139.       // Empty string prefix must have an output (block) in the index!
  140.       assert arc.isFinal();
  141.       assert arc.output != null;
  142.  
  143.       // if (DEBUG) {
  144.       //   System.out.println("    no seek state; push root frame");
  145.       // }
  146.  
  147.       output = arc.output;
  148.  
  149.       currentFrame = staticFrame;
  150.  
  151.       //term.length = 0;
  152.       targetUpto = 0;
  153.       currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
  154.     }
  155.  
  156.     // if (DEBUG) {
  157.     //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
  158.     // }
  159.  
  160.     // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
  161.     while (targetUpto < target.length) {
  162.  
  163.       final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
  164.  
  165.       final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
  166.  
  167.       if (nextArc == null) {
  168.  
  169.         // Index is exhausted
  170.         // if (DEBUG) {
  171.         //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
  172.         // }
  173.            
  174.         validIndexPrefix = currentFrame.prefix;
  175.         //validIndexPrefix = targetUpto;
  176.  
  177.         currentFrame.scanToFloorFrame(target);
  178.  
  179.         if (!currentFrame.hasTerms) {
  180.           termExists = false;
  181.           term.setByteAt(targetUpto, (byte) targetLabel);
  182.           term.setLength(1+targetUpto);
  183.           // if (DEBUG) {
  184.           //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
  185.           // }
  186.           return false;
  187.         }
  188.  
  189.         currentFrame.loadBlock();
  190.  
  191.         final SeekStatus result = currentFrame.scanToTerm(target, true);            
  192.         if (result == SeekStatus.FOUND) {
  193.           // if (DEBUG) {
  194.           //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
  195.           // }
  196.           return true;
  197.         } else {
  198.           // if (DEBUG) {
  199.           //   System.out.println("  got " + result + "; return NOT_FOUND term=" + brToString(term));
  200.           // }
  201.           return false;
  202.         }
  203.       } else {
  204.         // Follow this arc
  205.         arc = nextArc;
  206.         term.setByteAt(targetUpto, (byte) targetLabel);
  207.         // Aggregate output as we go:
  208.         assert arc.output != null;
  209.         if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
  210.           output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
  211.         }
  212.  
  213.         // if (DEBUG) {
  214.         //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
  215.         // }
  216.         targetUpto++;
  217.  
  218.         if (arc.isFinal()) {
  219.           //if (DEBUG) System.out.println("    arc is final!");
  220.           currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
  221.           //if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
  222.         }
  223.       }
  224.     }
  225.  
  226.     //validIndexPrefix = targetUpto;
  227.     validIndexPrefix = currentFrame.prefix;
  228.  
  229.     currentFrame.scanToFloorFrame(target);
  230.  
  231.     // Target term is entirely contained in the index:
  232.     if (!currentFrame.hasTerms) {
  233.       termExists = false;
  234.       term.setLength(targetUpto);
  235.       // if (DEBUG) {
  236.       //   System.out.println("  FAST NOT_FOUND term=" + brToString(term));
  237.       // }
  238.       return false;
  239.     }
  240.  
  241.     currentFrame.loadBlock();
  242.  
  243.     final SeekStatus result = currentFrame.scanToTerm(target, true);            
  244.     if (result == SeekStatus.FOUND) {
  245.       // if (DEBUG) {
  246.       //   System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
  247.       // }
  248.       return true;
  249.     } else {
  250.       // if (DEBUG) {
  251.       //   System.out.println("  got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
  252.       // }
  253.  
  254.       return false;
  255.     }
  256.   }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement