Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - ## example of usage: https://stackoverflow.com/a/75206102/6146136
 - ## for nested headers, see https://pastebin.com/vsKc9Qq5
 - def get_wikiSection(header, wSoup, sec1Header='? Abstract ?'):
 - sSel = 'h2:has(span.mw-headline[id])'
 - sSel = f'*:has(~{sSel}):not({sSel}~*)'
 - if not header: hId = hSel = None # [first section has no header]
 - elif isinstance(header, str):
 - hId, hSel = header, f'h2:has(span.mw-headline[id="{header}"])'
 - header = wSoup.select_one(hSel)
 - if not header: return {'errorMsg': f'Not found: {hSel}'}
 - else: hId = header.select_one('span.mw-headline[id]')['id']
 - ## header SHOULD BE: None/hId/a tag containing span.mw-headline[id] ##
 - if hId:
 - hSel = f'h2:has(span.mw-headline[id="{hId}"])'
 - sSel = f'{hSel}~*:not({hSel}~h2~*):not(h2)'
 - header = header.get_text(' ').strip()
 - else: header = sec1Header
 - sect = wSoup.select(sSel)
 - sText = '\n'.join([s.get_text(' ').strip() for s in sect])
 - sHtml = '\n'.join([''.join(s.prettify().splitlines()) for s in sect])
 - if not sect: sText = sHtml = None
 - return {'headerId': hId, 'sectionHeader': header,
 - 'sectionText': sText, 'sectionHtml': sHtml}
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment