Advertisement
Guest User

Element Tag Dictionary Parser

a guest
May 6th, 2021
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.39 KB | None | 0 0
  1. def findelement(content, attr={}):
  2.     result = None
  3.     class FrozenResultDict:
  4.         def __init__(self, content):
  5.             attr_fields = []; attr_values = []
  6.             attrs = re.search(r"<input.*?>", content).group()
  7.             if not isinstance(attrs, str):
  8.                 attrs = str(attrs, encoding="utf-8")
  9.             parsed_attrs = attrs.replace("=", " ").replace('"', " ").replace("<input", " ").replace(">", " ")
  10.             split_attrs = split(parsed_attrs)
  11.             for attr in split_attrs:
  12.                 i = split_attrs.index(attr)
  13.                 if(i % 2 == 0):
  14.                     if attr == "class":
  15.                         attr = "Class"
  16.                         attr_fields.append(attr)
  17.                     elif "-" in attr:
  18.                         attr = attr.replace("-", "_")
  19.                         attr_fields.append(attr)
  20.                     else:
  21.                         attr_fields.append(attr)
  22.                 else:
  23.                     attr_values.append(attr)
  24.             fields = join(attr_fields)
  25.             ResultDict = namedtuple("ResultDict", fields)
  26.             self.ResultDict = ResultDict(*attr_values)
  27.                
  28.            
  29.         def _immutable(self):
  30.             raise TypeError('FrozenResultDict is immutable')
  31.        
  32.         def __setitem__(self, key):
  33.             return self._immutable()
  34.         def __delitem__(self, key):
  35.             return self._immutable()
  36.         def __getitem__(self, key: str):
  37.             if key == "class":
  38.                 key = key.capitalize()
  39.             if "-" in key:
  40.                 key = key.replace("-", "_")
  41.             item = getattr(self.ResultDict, key)
  42.             return item
  43.         def __hash__(self):
  44.             return id(self)
  45.         def __iter__(self):
  46.             return iter(self.ResultDict)
  47.         def __len__(self):
  48.             return len(self.ResultDict)
  49.         def __repr__(self):
  50.             return f"<FrozenResultDict: {repr(self.ResultDict)}>"
  51.            
  52.     if isinstance(content, bytes):
  53.         html = str(content, encoding="utf-8")
  54.     else:
  55.         html = content
  56.     if attr != {}:
  57.         regex_string = r'''<input'''
  58.         for key, value in zip(attr.keys(), list(attr.values())):
  59.             regex_string += f' {key}="{value}"'
  60.         regex_string += r".*?>"
  61.         try:
  62.             re_result = re.search(regex_string, html).group()
  63.             result = FrozenResultDict(re_result)
  64.         except AttributeError:
  65.             result = re.search(regex_string, html)
  66.         finally:
  67.             return result
  68.     else:
  69.         result = re.search(r"<input.*? />", html).group()
  70.         return result
  71.  
  72. def scrapeDataType(url: str):
  73.     """
  74.    Scrapes data-role-type from requested url (only to be used in edit_user pages)
  75.    """
  76.     token = b64decode(session["token"])
  77.     token = str(token, encoding="utf-8")
  78.     web_page = requests.request('GET', url, headers={'User-Agent': f"{request.user_agent}"}, params={"token":token}, allow_redirects=False)
  79.     elem_tag_result = findelement(web_page.content, {"id":"data-role-type-container", "type":"hidden"})
  80.     try:
  81.         elem_tag_result["data-role-type"]
  82.     except Exception as e:
  83.         raise OperationError("DataType Operation failed due to re.search returning NoneType", "scrapeDateType -> findelement") from e
  84.     else:
  85.         return elem_tag_result["data-role-type"]
  86.     return elem_tag_result
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement