Advertisement
Guest User

my extention to searchparsing.py

a guest
Apr 27th, 2012
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.85 KB | None | 0 0
  1. from pyparsing import Word, alphanums, Keyword, Group, Combine, Forward, Suppress, Optional, OneOrMore, oneOf
  2. from sets import Set
  3.  
  4. class SearchQueryParser:
  5.  
  6.     def __init__(self):
  7.         self._methods = {
  8.             'and': self.evaluateAnd,
  9.             'or': self.evaluateOr,
  10.             'not': self.evaluateNot,
  11.             'parenthesis': self.evaluateParenthesis,
  12.             'quotes': self.evaluateQuotes,
  13.             'word': self.evaluateWord,
  14.             'wordwildcard': self.evaluateWordWildcard,
  15.             'in': self.evaluateIn,
  16.         }
  17.         self._parser = self.parser()
  18.    
  19.     def parser(self):
  20.         """
  21.        This function returns a parser.
  22.        The grammar should be like most full text search engines (Google, Tsearch, Lucene).
  23.        
  24.        Grammar:
  25.        - a query consists of alphanumeric words, with an optional '*' wildcard
  26.          at the end of a word
  27.        - a sequence of words between quotes is a literal string
  28.        - words can be used together by using operators ('and' or 'or')
  29.        - words with operators can be grouped with parenthesis
  30.        - a word or group of words can be preceded by a 'not' operator
  31.        - the 'and' operator precedes an 'or' operator
  32.        - if an operator is missing, use an 'and' operator
  33.        """
  34.         operatorOr = Forward()
  35.        
  36.         operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \
  37.                             Group(Word(alphanums)).setResultsName('word')
  38.        
  39.         operatorQuotesContent = Forward()
  40.         operatorQuotesContent << (
  41.             (operatorWord + operatorQuotesContent) | operatorWord
  42.         )
  43.        
  44.         operatorQuotes = Group(
  45.             Suppress('"') + operatorQuotesContent + Suppress('"')
  46.         ).setResultsName("quotes") | operatorWord
  47.  
  48.         operatorIn = Forward()
  49.         operatorIn <<(Group(
  50.             operatorOr + Suppress(":") + operatorQuotes
  51.         ).setResultsName("in") | operatorOr)
  52.        
  53.         operatorParenthesis = Group(
  54.             (Suppress("(") + operatorOr + Suppress(")"))
  55.         ).setResultsName("parenthesis") | operatorQuotes
  56.  
  57.        
  58.         operatorNot = Forward()
  59.         operatorNot << (Group(
  60.             Suppress(Keyword("not", caseless=True)) + operatorNot
  61.         ).setResultsName("not") | operatorParenthesis)
  62.  
  63.         operatorAnd = Forward()
  64.         operatorAnd << (Group(
  65.             operatorNot +
  66.             Suppress(Keyword("and", caseless=True))
  67.             #Suppress("&")
  68.             + operatorAnd
  69.         ).setResultsName("and") | Group(
  70.             operatorNot + OneOrMore(~oneOf("and or in") + operatorAnd)
  71.         ).setResultsName("and") | operatorNot)
  72.        
  73.         operatorOr << (Group(
  74.             operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
  75.         ).setResultsName("or") | operatorAnd)
  76.  
  77.        
  78.        
  79.  
  80.         return operatorOr.parseString
  81.  
  82.     def evaluateIn(self, argument):
  83.         print "evaluating ",argument[0],":",argument[1]
  84.         return self.evaluat(argument[1])
  85.        
  86.     def evaluateAnd(self, argument):
  87.         print "eval: ",argument[0]," AND",argument[1]
  88.         return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
  89.  
  90.     def evaluateOr(self, argument):
  91.         print "eval: ",argument[0]," OR",argument[1]
  92.         return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
  93.  
  94.     def evaluateNot(self, argument):
  95.         return self.GetNot(self.evaluate(argument[0]))
  96.  
  97.     def evaluateParenthesis(self, argument):
  98.         return self.evaluate(argument[0])
  99.  
  100.     def evaluateQuotes(self, argument):
  101.         """Evaluate quoted strings
  102.  
  103.        First is does an 'and' on the indidual search terms, then it asks the
  104.        function GetQuoted to only return the subset of ID's that contain the
  105.        literal string.
  106.        """
  107.         r = Set()
  108.         search_terms = []
  109.         for item in argument:
  110.             search_terms.append(item[0])
  111.             if len(r) == 0:
  112.                 r = self.evaluate(item)
  113.             else:
  114.                 r = r.intersection(self.evaluate(item))
  115.         return self.GetQuotes(' '.join(search_terms), r)
  116.  
  117.     def evaluateWord(self, argument):
  118.         return self.GetWord(argument[0])
  119.  
  120.     def evaluateWordWildcard(self, argument):
  121.         return self.GetWordWildcard(argument[0])
  122.        
  123.     def evaluate(self, argument):
  124.         return self._methods[argument.getName()](argument)
  125.  
  126.     def Parse(self, query):
  127.         #print self._parser(query)[0]
  128.         return self.evaluate(self._parser(query)[0])
  129.  
  130.     def GetWord(self, word):
  131.         return word
  132.  
  133.     def GetWordWildcard(self, word):
  134.         return word
  135.  
  136.     def GetQuotes(self, search_string, tmp_result):
  137.         return Set()
  138.  
  139.     def GetNot(self, not_set):
  140.         return Set().difference(not_set)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement