extractTagData + fillDict_fromTag

## for extracting specific details as from html ##
## A simpler version of htreeToDict [ https://pastebin.com/BpjZSQPi ] ##
## Sample usage at https://stackoverflow.com/a/74631249/6146136 ##

def extractTagData(tagSoup, selector, targetAttr='', defVal=None, link1=False):
    s = tagSoup.select_one(selector) if selector else tagSoup
    if s is None: return (defVal, None) if link1 else defVal
    if targetAttr == '':
        sTxt = s.get_text(' ').strip()
        sVal = defVal if type(defVal) == str and not sTxt else sTxt
    else: sVal = s.get(targetAttr, defVal)
    if not link1: return sVal
    link1 = s.select_one('a[href]')
    return (sVal, link1.get('href') if link1 else None)


def fillDict_fromTag(mSoup, selectorsDict, initDict={}, rootUrl=''):
    for k, sel in selectorsDict.items():
        if type(sel) == str:
            initDict[k] = extractTagData(mSoup, sel)
        elif type(sel) == tuple and len(sel) == 2:
            initDict[k] = extractTagData(mSoup, sel[0], sel[1])
        elif type(sel) == dict and 'k' in sel and 'v' in sel:
            sk, sv = sel['k'], sel['v']
            for ot in mSoup.select(f'{k}:has({sk}):has({sv})'):
                kVal = extractTagData(ot, sk)
                sVal, sLink = extractTagData(ot, sv, link1=True)
                initDict[kVal] = sVal
                if sLink:
                    if rootUrl and sLink[0] == '/': sLink = rootUrl + sLink
                    initDict[f'{kVal} [link]'] = sLink
        elif type(sel) == dict and 'sel' in sel and 'sep' in sel:
            for l in mSoup.select(f'{k} {sel["sel"]}'):
                ltxt = l.get_text(' ').strip()
                if sel['sep'] in ltxt:
                    kk, vv = ltxt.split(sel['sep'], 1)
                    initDict[kk] = vv
        # elif... ## add more options
        else: print(f'Unfamiliar reference format: {k} --> {sel}')

    return initDict