Try95th

find/select+get - extract value/text from a bs4 Tag

Jan 17th, 2023 (edited)
191
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.51 KB | None | 0 0
  1. ##### find/select & get to extract value/text from a bs4 Tag #####
  2. ### returns a default value [ or `None` ]  if nothing is found ###
  3. ## [ because chaining .find.. with .get..  might raise errors ] ##
  4. ## [ examples at bottom ] ##
  5.  
  6. ########################### VERSION 1 : USING .find ###########################
  7. def find_get(tag, targetAttr='', defaultVal=None, **findArgs):
  8.     tag = tag.find(**findArgs) if findArgs else tag
  9.     if tag is None: return defaultVal
  10.     if targetAttr: return tag.get(targetAttr, defaultVal)
  11.     return tag.get_text(' ').strip()
  12. ###############################################################################
  13.  
  14. ######################## VERSION 2 : USING .select_one ########################
  15. def select_get(tagSoup, selector='', targetAttr='', defaultVal=None):
  16.     ta, dv = str(targetAttr).strip(), defaultVal
  17.     el = tagSoup.select_one(selector) if selector else tagSoup
  18.     return (el.get(ta,dv) if ta else el.get_text(' ').strip()) if el else dv
  19.  
  20. ### variations of select_get used by #########################
  21. #### fillDict_fromTag <-- https://pastebin.com/hKXYetmj   ####
  22. #### htreeToDict <------- https://pastebin.com/BpjZSQPi   ####
  23. #### selectForList <----- https://pastebin.com/ZnZ7xM6u   ####
  24. ##############################################################
  25. ###############################################################################
  26.  
  27.  
  28. ################################### Examples of usage ###################################
  29. ## soup.get_text(' ').strip() # <--> # find_get(soup) # <--> # select_get(soup)
  30.  
  31. ## soup.find('a', {'id':'close_tab'}).get('href') # <-->
  32. ## soup.select_one('a#close_tab').get('href') # same as:
  33. # find_get(soup, 'href', name='a', attrs={'id':'close_tab'}) # <-->
  34. # select_get(soup, 'a#close_tab', 'href')
  35.  
  36. ## soup.find('input', {'class':'input1'}).get('value') # <-->
  37. ## soup.select_one('input.input1').get('value') # same as:
  38. # find_get(soup, 'value', name='input', attrs={'class':'input1'}) # <-->
  39. # select_get(soup, 'input.input1', 'value')
  40.  
  41. ## soup.find('p', class_='a-class b-class').get_text(' ').strip() # <-->
  42. ## soup.select_one('p[class="a-class b-class"]').get_text(' ').strip() # same as:
  43. # find_get(soup, name='p', class_='a-class b-class') # <-->
  44. # select_get(soup, 'p[class="a-class b-class"]')
  45. ### NOTE: .select('p.a-class.b-class') will match
  46. ##### <p class="a-class b-class">...</p> # but also
  47. ##### <p class="a-class b-class c-class">...</p>
  48. #########################################################################################
Advertisement
Add Comment
Please, Sign In to add comment