Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Perform query tree:
- - got tree with IdxBlockReader at leafs (terms)
- - produce matched document ID, one by one
- """
- import idxblock
- import qparser
- class NodePerformer:
- def __init__(self, left=None, right=None):
- self.left = left
- self.right = right
- class TermNodePerformer(NodePerformer):
- def __init__(self, node):
- NodePerformer.__init__(self)
- assert (isinstance(node.idxblock, idxblock.IdxBlockReader))
- self.reader = node.idxblock
- def goto_id(self, id):
- return self.reader.goto_id(id)
- def evaluate(self):
- return self.reader.cur_doc()
- class EmptyNodePerformer(NodePerformer):
- """
- special case for non-existing blocks for terms
- """
- def __init__(self):
- NodePerformer.__init__(self)
- def goto_id(self, id):
- pass
- def evaluate(self):
- return idxblock.OMEGA_ID
- class AndNodePerformer(NodePerformer):
- def __init__(self, left, right):
- NodePerformer.__init__(self, left, right)
- def evaluate(self):
- lval = self.left.evaluate()
- rval = self.right.evaluate()
- while lval != rval and lval != idxblock.OMEGA_ID and rval != idxblock.OMEGA_ID:
- if lval < rval:
- self.left.goto_id(rval)
- lval = self.left.evaluate()
- else:
- self.right.goto_id(lval)
- rval = self.right.evaluate()
- if lval == idxblock.OMEGA_ID or rval == idxblock.OMEGA_ID:
- return idxblock.OMEGA_ID
- assert lval == rval
- return lval
- def goto_id(self, id):
- self.left.goto_id(id)
- self.right.goto_id(id)
- class OrNodePerformer(NodePerformer):
- def __init__(self, left, right):
- NodePerformer.__init__(self, left, right)
- def evaluate(self):
- lval = self.left.evaluate()
- rval = self.right.evaluate()
- if lval == idxblock.OMEGA_ID:
- return rval
- if rval == idxblock.OMEGA_ID:
- return lval
- return min(lval, rval)
- def goto_id(self, id):
- self.left.goto_id(id)
- self.right.goto_id(id)
- class NegationNodePerformer:
- """
- Negation node - provide numbers which are absent in `operand` node
- For example:
- - [0, 1, 3] => [2, 4, ...]
- - [3] => [0, 1, 2, 4, ...]
- - [] => [0, 1, ...]
- """
- def __init__(self, operand):
- self.operand = operand
- self.virtual_doc_id = idxblock.ALPHA_ID
- def evaluate(self):
- while self.virtual_doc_id == self.operand.evaluate():
- self.virtual_doc_id += 1
- self.operand.goto_id(self.virtual_doc_id)
- return self.virtual_doc_id
- def goto_id(self, id):
- self.operand.goto_id(id)
- self.virtual_doc_id = id
- class BadTreeError(Exception):
- pass
- class QTreePerformer:
- def __init__(self, tree):
- assert (isinstance(tree, qparser.QtreeTypeInfo))
- self.last_doc_id = idxblock.ALPHA_ID
- self.root = QTreePerformer._deep_copy(tree)
- @staticmethod
- def _deep_copy(subroot):
- if subroot.is_operator:
- if subroot.value == '&':
- return AndNodePerformer(QTreePerformer._deep_copy(subroot.left),
- QTreePerformer._deep_copy(subroot.right))
- if subroot.value == '|':
- return OrNodePerformer(QTreePerformer._deep_copy(subroot.left),
- QTreePerformer._deep_copy(subroot.right))
- if subroot.value == '!':
- return NegationNodePerformer(QTreePerformer._deep_copy(subroot.unary_operand()))
- raise BadTreeError('Unknown operator "%s"' % subroot.value)
- else:
- assert subroot.is_term
- return TermNodePerformer(subroot) if subroot.idxblock else EmptyNodePerformer()
- def finished(self):
- return self.last_doc_id == idxblock.OMEGA_ID
- def next_doc(self):
- if self.finished():
- return None
- self.root.goto_id(self.last_doc_id + 1)
- self.last_doc_id = self.root.evaluate()
- if self.finished():
- return None
- return self.last_doc_id
- def docs(self):
- while True:
- val = self.next_doc()
- if val is None:
- break
- yield val
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement