Try95th

confParse

Mar 4th, 2023 (edited)
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.65 KB | None | 0 0
  1. ## for extracting specific details as from html ###########
  2. ## an improvement on                                  #####
  3. #### getSoupData [ https://pastebin.com/DULqFBgp ]    #####
  4. #### fillDict_fromTag [ https://pastebin.com/hKXYetmj ] ###
  5. ###########################################################
  6.  
  7. #### customized tripadvisor scraper [2 examples] at https://pastebin.com/5AjaUne0
  8. ###### example 1: tSel as a function [confParse is quite superfluous here]
  9. ###### example 2: tSel as a [reference] data structure containing selectors
  10.  
  11. #### example 3: generalized example of tSel at https://pastebin.com/8rebux2i
  12.  
  13.  
  14. ############################# REQUIRED  FUNCTIONS #############################
  15. def call_w_params(cFunc, xList=[],xMain={}, xDef={},xUp={},fKey=None):
  16.     if isinstance(xMain, dict) and not (fKey and xMain.get(fKey)):
  17.         return cFunc(*xList, **{**xDef, **xMain, **xUp})
  18.     if isinstance(xMain, tuple): return cFunc(*xList, *xMain, **xUp)
  19.     return cFunc(*xList, xMain, **{**xDef,**xUp})
  20.  
  21.  
  22. def try_call(toCall, *cArgs, vDef=None):
  23.     try: return toCall(*cArgs)
  24.     except: return vDef
  25.  
  26.  
  27. def try_apply(obj, toCall, lArgs=[], nArgs={}, **dArgs):
  28.     if not isinstance(toCall, str): lArgs = [obj, *lArgs]
  29.     else: toCall = getattr(obj, toCall, None)
  30.     if not callable(toCall): return obj
  31.     try: return toCall(*lArgs, **{**nArgs, **dArgs})
  32.     except: return obj
  33. ###############################################################################
  34.  
  35.  
  36. ################################ MAIN FUNCTION ################################
  37. def confParse(bsTag, tSel='', tAttr='', defVal=None, listSel='',
  38.               mFunc=None, *mArgs, curDepth=0, maxDepth=950, **mnArgs):
  39.     if tAttr in ['"', '"placeholder"', '"_"']: return tSel
  40.     if not curDepth < maxDepth:
  41.         print({'depth':curDepth,'limit':maxDepth, 'tSel':tSel,'return':defVal})
  42.         return defVal    
  43.  
  44.     if listSel and isinstance(listSel,str):  selected = bsTag.select(listSel)
  45.     else: selected = try_call(listSel,bsTag,[]) if callable(listSel) else None
  46.     if isinstance(selected, list):
  47.         return try_apply([confParse(t, tSel,tAttr,defVal, '', mFunc,*mArgs,**{
  48.             **{k: v for k, v in mnArgs.items() if k!='__apply2resultSet__' },
  49.             'curDepth': curDepth+1, 'maxDepth': maxDepth
  50.         }) for t in selected], mnArgs.get('__apply2resultSet__'))
  51.  
  52.     selRef = tSel.get('__selref__') if isinstance(tSel,dict) else None
  53.     if isinstance(selRef, (dict, list, set, tuple)): tSel = selRef
  54.     if isinstance(tSel, (dict, tuple, list, set)):
  55.         dnArgs = {'curDepth': curDepth+1, 'maxDepth': maxDepth}
  56.         cnArgs, kl = {'defVal': defVal, 'mFunc': mFunc, **mnArgs}, None  
  57.         if isinstance(tSel, dict):  
  58.             kl = [k for k in tSel if k != '__selref__']
  59.             tSel = [v for k,v in tSel.items() if k != '__selref__']
  60.        
  61.         ta, tConv, sd = tAttr, type(tSel), [call_w_params(
  62.             confParse, [bsTag], sParams, cnArgs, dnArgs, '__selref__'
  63.         ) for sParams in tSel]
  64.         return try_apply(tConv(sd) if kl is None else dict(zip(kl,sd)), ta)
  65.    
  66.     ### ACTUAL EXTRACTION ###
  67.     if callable(tSel): el = try_call(tSel, bsTag)
  68.     else: el = bsTag.select_one(str(tSel).strip()) if tSel else bsTag
  69.     if not el: return defVal
  70.     if not isinstance(tAttr, str): rVal = el
  71.     else: rVal = el.get(tAttr,defVal) if tAttr else el.get_text(' ',strip=True)
  72.     if tAttr=='class' and isinstance(rVal, list): rVal = ' '.join(rVal)
  73.     #########################
  74.  
  75.     return try_apply(rVal, mFunc, mArgs, mnArgs) if mFunc else rVal
  76. ###############################################################################
Advertisement
Add Comment
Please, Sign In to add comment