Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##### find/select & get to extract value/text from a bs4 Tag #####
- ### returns a default value [ or `None` ] if nothing is found ###
- ## [ because chaining .find.. with .get.. might raise errors ] ##
- ## [ examples at bottom ] ##
- ########################### VERSION 1 : USING .find ###########################
- def find_get(tag, targetAttr='', defaultVal=None, **findArgs):
- tag = tag.find(**findArgs) if findArgs else tag
- if tag is None: return defaultVal
- if targetAttr: return tag.get(targetAttr, defaultVal)
- return tag.get_text(' ').strip()
- ###############################################################################
- ######################## VERSION 2 : USING .select_one ########################
- def select_get(tagSoup, selector='', targetAttr='', defaultVal=None):
- ta, dv = str(targetAttr).strip(), defaultVal
- el = tagSoup.select_one(selector) if selector else tagSoup
- return (el.get(ta,dv) if ta else el.get_text(' ').strip()) if el else dv
- ### variations of select_get used by #########################
- #### fillDict_fromTag <-- https://pastebin.com/hKXYetmj ####
- #### htreeToDict <------- https://pastebin.com/BpjZSQPi ####
- #### selectForList <----- https://pastebin.com/ZnZ7xM6u ####
- ##############################################################
- ###############################################################################
- ################################### Examples of usage ###################################
- ## soup.get_text(' ').strip() # <--> # find_get(soup) # <--> # select_get(soup)
- ## soup.find('a', {'id':'close_tab'}).get('href') # <-->
- ## soup.select_one('a#close_tab').get('href') # same as:
- # find_get(soup, 'href', name='a', attrs={'id':'close_tab'}) # <-->
- # select_get(soup, 'a#close_tab', 'href')
- ## soup.find('input', {'class':'input1'}).get('value') # <-->
- ## soup.select_one('input.input1').get('value') # same as:
- # find_get(soup, 'value', name='input', attrs={'class':'input1'}) # <-->
- # select_get(soup, 'input.input1', 'value')
- ## soup.find('p', class_='a-class b-class').get_text(' ').strip() # <-->
- ## soup.select_one('p[class="a-class b-class"]').get_text(' ').strip() # same as:
- # find_get(soup, name='p', class_='a-class b-class') # <-->
- # select_get(soup, 'p[class="a-class b-class"]')
- ### NOTE: .select('p.a-class.b-class') will match
- ##### <p class="a-class b-class">...</p> # but also
- ##### <p class="a-class b-class c-class">...</p>
- #########################################################################################
Advertisement
Add Comment
Please, Sign In to add comment