Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- @Override
- public boolean seekExact(BytesRef target) throws IOException {
- if (fr.index == null) {
- throw new IllegalStateException("terms index was not loaded");
- }
- term.grow(1 + target.length);
- assert clearEOF();
- // if (DEBUG) {
- // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
- // printSeekState(System.out);
- // }
- FST.Arc<BytesRef> arc;
- int targetUpto;
- BytesRef output;
- targetBeforeCurrentLength = currentFrame.ord;
- if (currentFrame != staticFrame) {
- // We are already seek'd; find the common
- // prefix of new seek term vs current term and
- // re-use the corresponding seek state. For
- // example, if app first seeks to foobar, then
- // seeks to foobaz, we can re-use the seek state
- // for the first 5 bytes.
- // if (DEBUG) {
- // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
- // }
- arc = arcs[0];
- assert arc.isFinal();
- output = arc.output;
- targetUpto = 0;
- SegmentTermsEnumFrame lastFrame = stack[0];
- assert validIndexPrefix <= term.length();
- final int targetLimit = Math.min(target.length, validIndexPrefix);
- int cmp = 0;
- // TODO: reverse vLong byte order for better FST
- // prefix output sharing
- // First compare up to valid seek frames:
- while (targetUpto < targetLimit) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
- // if (DEBUG) {
- // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
- // }
- if (cmp != 0) {
- break;
- }
- arc = arcs[1+targetUpto];
- assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
- if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
- output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
- }
- if (arc.isFinal()) {
- lastFrame = stack[1+lastFrame.ord];
- }
- targetUpto++;
- }
- if (cmp == 0) {
- final int targetUptoMid = targetUpto;
- // Second compare the rest of the term, but
- // don't save arc/output/frame; we only do this
- // to find out if the target term is before,
- // equal or after the current term
- final int targetLimit2 = Math.min(target.length, term.length());
- while (targetUpto < targetLimit2) {
- cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
- // if (DEBUG) {
- // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
- // }
- if (cmp != 0) {
- break;
- }
- targetUpto++;
- }
- if (cmp == 0) {
- cmp = term.length() - target.length;
- }
- targetUpto = targetUptoMid;
- }
- if (cmp < 0) {
- // Common case: target term is after current
- // term, ie, app is seeking multiple terms
- // in sorted order
- // if (DEBUG) {
- // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
- // }
- currentFrame = lastFrame;
- } else if (cmp > 0) {
- // Uncommon case: target term
- // is before current term; this means we can
- // keep the currentFrame but we must rewind it
- // (so we scan from the start)
- targetBeforeCurrentLength = lastFrame.ord;
- // if (DEBUG) {
- // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
- // }
- currentFrame = lastFrame;
- currentFrame.rewind();
- } else {
- // Target is exactly the same as current term
- assert term.length() == target.length;
- if (termExists) {
- // if (DEBUG) {
- // System.out.println(" target is same as current; return true");
- // }
- return true;
- } else {
- // if (DEBUG) {
- // System.out.println(" target is same as current but term doesn't exist");
- // }
- }
- //validIndexPrefix = currentFrame.depth;
- //term.length = target.length;
- //return termExists;
- }
- } else {
- targetBeforeCurrentLength = -1;
- arc = fr.index.getFirstArc(arcs[0]);
- // Empty string prefix must have an output (block) in the index!
- assert arc.isFinal();
- assert arc.output != null;
- // if (DEBUG) {
- // System.out.println(" no seek state; push root frame");
- // }
- output = arc.output;
- currentFrame = staticFrame;
- //term.length = 0;
- targetUpto = 0;
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
- }
- // if (DEBUG) {
- // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
- // }
- // We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
- while (targetUpto < target.length) {
- final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
- final FST.Arc<BytesRef> nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
- if (nextArc == null) {
- // Index is exhausted
- // if (DEBUG) {
- // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
- // }
- validIndexPrefix = currentFrame.prefix;
- //validIndexPrefix = targetUpto;
- currentFrame.scanToFloorFrame(target);
- if (!currentFrame.hasTerms) {
- termExists = false;
- term.setByteAt(targetUpto, (byte) targetLabel);
- term.setLength(1+targetUpto);
- // if (DEBUG) {
- // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
- // }
- return false;
- }
- currentFrame.loadBlock();
- final SeekStatus result = currentFrame.scanToTerm(target, true);
- if (result == SeekStatus.FOUND) {
- // if (DEBUG) {
- // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
- // }
- return true;
- } else {
- // if (DEBUG) {
- // System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
- // }
- return false;
- }
- } else {
- // Follow this arc
- arc = nextArc;
- term.setByteAt(targetUpto, (byte) targetLabel);
- // Aggregate output as we go:
- assert arc.output != null;
- if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
- output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
- }
- // if (DEBUG) {
- // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
- // }
- targetUpto++;
- if (arc.isFinal()) {
- //if (DEBUG) System.out.println(" arc is final!");
- currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
- //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
- }
- }
- }
- //validIndexPrefix = targetUpto;
- validIndexPrefix = currentFrame.prefix;
- currentFrame.scanToFloorFrame(target);
- // Target term is entirely contained in the index:
- if (!currentFrame.hasTerms) {
- termExists = false;
- term.setLength(targetUpto);
- // if (DEBUG) {
- // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
- // }
- return false;
- }
- currentFrame.loadBlock();
- final SeekStatus result = currentFrame.scanToTerm(target, true);
- if (result == SeekStatus.FOUND) {
- // if (DEBUG) {
- // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
- // }
- return true;
- } else {
- // if (DEBUG) {
- // System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
- // }
- return false;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement