Advertisement
Guest User

Untitled

a guest
Mar 21st, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.28 KB | None | 0 0
  1.  
  2.  
  3. import json
  4. import sys
  5.  
  6. from collections import defaultdict
  7.  
  8.  
  9. class Entity(object):
  10.     """
  11.    Represents the search entity in the "test" selectors.
  12.    Houses matches_dom and get_entity from a given defaultdict factory method
  13.    """
  14.     def __init__(self, tag=None, classes=None, identifier=None):
  15.         self.tag = tag
  16.         self.classes = None if not classes else set(classes)  # assumes classes will be passed as a list or None
  17.         self.identifier = identifier
  18.  
  19.     @staticmethod
  20.     def matches_dom(entity, dom: dict):
  21.         """
  22.        Compares DOM against the given entity and return trues if the dom can match the search entity
  23.        :param entity: the search entity containing tag, classes and identifier
  24.        :param dom: the current DOM element we are comparing with
  25.        :return: if the current DOM element is a match for the entity
  26.        """
  27.         assert(dom.get('tag') is not None)
  28.  
  29.         is_tag_match, is_id_match, are_classes_match = True, True, True
  30.         if entity.tag is not None:
  31.             is_tag_match = entity.tag == dom['tag']
  32.         if entity.identifier is not None:
  33.             is_id_match = entity.identifier == dom.get('id')
  34.         if entity.classes is not None:
  35.             classes = dom.get('classes', [()])
  36.             are_classes_match = all(c in classes for c in entity.classes)
  37.  
  38.         return is_tag_match & is_id_match & are_classes_match
  39.  
  40.     @staticmethod
  41.     def get_entity(dom_dict: defaultdict):
  42.         """
  43.        Factory method that generates entity out of a given defaultdict (Can be dict too)
  44.        :param dom_dict: dictionary containing tags, identifier and classes
  45.        :return: entity object
  46.        """
  47.         tag, identifier, classes = None, None, None
  48.  
  49.         if dom_dict.get('tag'):
  50.             tag = dom_dict['tag'][0]
  51.         if dom_dict.get('identifier'):
  52.             identifier = dom_dict['identifier'][0]
  53.         if dom_dict.get('classes'):
  54.             classes = dom_dict['classes']
  55.  
  56.         return Entity(tag=tag, classes=classes, identifier=identifier)
  57.  
  58.  
  59. class ParserException(Exception):
  60.     pass
  61.  
  62.  
  63. class SelectorUnitParser(object):
  64.     """ Parses the given selector string based on the state machine to form the entity object """
  65.     state_machine = {
  66.         'tag': {
  67.             'transition': {'.': 'classes', '#': 'identifier'}
  68.         },
  69.         'identifier': {
  70.             'transition': {'.': 'classes'}
  71.         },
  72.         'classes': {
  73.             'transition': {'.': 'classes'}
  74.         }
  75.     }
  76.  
  77.     @classmethod
  78.     def parse(cls, selector_unit: str):
  79.         """
  80.        Return search entity object from the given selector unit string
  81.        :param selector_unit: a selector of the form `tag#id.class1.class2`
  82.        :return: search entity object representation of the selector unit
  83.        """
  84.         data = defaultdict(list)
  85.  
  86.         current_state = 'tag'
  87.         current_str = ''
  88.         for c in selector_unit:
  89.             if c in cls.state_machine[current_state]['transition']:
  90.                 if current_str != '':
  91.                     data[current_state].append(current_str)
  92.                     current_str = ''
  93.                 current_state = cls.state_machine[current_state]['transition'][c]
  94.             elif c.isalnum() or c == '-':
  95.                 current_str += c
  96.             else:
  97.                 raise ParserException("Invalid character (%s) in selector str: %s" % (c, selector_unit))
  98.  
  99.         if current_str != '':
  100.             data[current_state].append(current_str)
  101.  
  102.         entity = Entity.get_entity(data)
  103.         return entity
  104.  
  105.  
  106. class Stack(list):
  107.     """ Simple wrapper for list to support push and peek operations """
  108.     def push(self, obj):
  109.         self.append(obj)
  110.  
  111.     def peek(self):
  112.         return self[-1]
  113.  
  114.  
  115. def get_search_stack(selector: str):
  116.     """
  117.    Given a search selector string this function constructs a stack of entities that needs to be searched for
  118.    in the hierarchy
  119.    :param selector: search selector string
  120.    :return: stack of all the entities that needs to be searched for in the hierarchy
  121.    """
  122.     selector_units = selector.split()
  123.     search_stack = Stack()
  124.  
  125.     # push the selector elements into the stack in reversed manner so we get the top
  126.     # NOTE: the stack can be replaced by a queue too.
  127.     for i in reversed(range(len(selector_units))):
  128.         entity = SelectorUnitParser.parse(selector_unit=selector_units[i])
  129.         search_stack.push(entity)
  130.  
  131.     return search_stack
  132.  
  133.  
  134. def get_matches_in_hierarchy(search_stack, current_dom):
  135.     """
  136.    the core function of the program, does a DFS recursively to figure out the matches in the current tree path
  137.    :param search_stack: the entities that needs to be matched from
  138.    :param current_dom: the current dom element from the hierarchy that needs to be searched with
  139.    :return: the count of number of matches, that is recursively returned and added up
  140.    """
  141.  
  142.     search_entity = search_stack.peek()
  143.  
  144.     matches = 0
  145.     if Entity.matches_dom(search_entity, current_dom):
  146.         if len(search_stack) == 1:  # if stack is the final element don't pop it, try to find matches in children
  147.             matches = 1
  148.         else:  # if not the final element pop it and try to find next match in the stack
  149.             search_stack.pop()
  150.  
  151.     if current_dom.get('children'):
  152.         for c in current_dom['children']:
  153.             # clone search_stack to find potential matches from the current search stack
  154.             matches += get_matches_in_hierarchy(Stack(search_stack), c)
  155.  
  156.     return matches
  157.  
  158.  
  159. def process(user_input):
  160.     """
  161.    Iterates through each tests and tries to find the number of matches in the hierarchy
  162.    :param user_input: the input contents from the file
  163.    :return: the list of count of all potential matches
  164.    """
  165.     input_json = json.loads(user_input)
  166.  
  167.     assert(input_json.get('hierarchy') is not None)
  168.     assert(input_json.get('tests') is not None)
  169.  
  170.     hierarchy = input_json['hierarchy']
  171.     results = list()
  172.  
  173.     for selectors in input_json.get('tests'):
  174.         search_stack = get_search_stack(selectors)
  175.         results.append(get_matches_in_hierarchy(search_stack, hierarchy))
  176.  
  177.     return results
  178.  
  179.  
  180. def driver(main_file=None):
  181.     """
  182.    The main driver function, runs the selector parser
  183.    :param main_file: If a file name is specified read it from the file instead of stdin
  184.    :return: output without whitespaces
  185.    """
  186.     if main_file:
  187.         f = open(input_file, "r")
  188.     else:
  189.         f = sys.stdin
  190.  
  191.     output = process(f.read())
  192.     print(json.dumps(output, separators=(',', ':')))
  193.  
  194.  
  195. def test():
  196.     """ Some basic tests, can be replaced by a test framework. """
  197.     entity = SelectorUnitParser.parse('body#content')
  198.     assert(entity.tag == 'body')
  199.     assert(entity.identifier == 'content')
  200.  
  201.     entity = SelectorUnitParser.parse('body#content.foo.bar.zoo.elephant')
  202.     assert(entity.tag == 'body')
  203.     assert(entity.identifier == 'content')
  204.     assert(len(entity.classes) == 4)
  205.  
  206.     search_stack = get_search_stack('body#content .foo .bar. .zoo. .elephants')
  207.     assert(len(search_stack) == 5)
  208.     assert('elephants' in search_stack[0].classes)
  209.  
  210.  
  211. if __name__ == "__main__":
  212.     input_file = sys.argv[1] if len(sys.argv) > 1 else None
  213.     driver(input_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement