Try95th

jsonload_from_script

Mar 19th, 2023 (edited)
167
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.16 KB | None | 0 0
  1. ## For extracting a [JSON] variable by name from JavaScript string or bs4 script-tag ##
  2. ## simpler [and more limited] version of findObj_inJS @ https://pastebin.com/UVcLniSG #
  3. ## example at https://pastebin.com/5AjaUne0 [other examples at bottom] ################
  4.  
  5. # from bs4 import BeautifulSoup
  6. import json
  7.  
  8. ## [ inputs explained after definition ]
  9. def jsonload_from_script(inpX, varName, selector='script', prepFn=lambda x:x):
  10.     try: sList = [inpX] if inpX.name=='script' else inpX.select(selector)
  11.     except: sList = inpX if isinstance(inpX, list) else [inpX]
  12.     for s in sList:
  13.         s = getattr(s, 'string', s)
  14.         if not (isinstance(s,str) and s.strip()): continue
  15.         sections = prepFn(s.strip()).split(';')
  16.  
  17.         for i, section in enumerate(sections):
  18.             name, val = ['', *section.split('=', 1)][-2:]
  19.             if '=' in section and name.strip()==varName:
  20.                 sections,t1 = [val]+sections[i+1:],True
  21.                 try: return json.loads(val)
  22.                 except: break
  23.         else: continue
  24.  
  25.         while len(sections) > 1:
  26.             try: return json.loads(';'.join(sections))
  27.             except: sections = sections[:-1]  
  28.     ### end ###
  29.  
  30. ### INPUTS
  31. #-> inpX: must be a bs4 document/tag/ResultSet or a string or a list of strings
  32. #### [ target variable must be JSON and seaparated from other variables by ; ]
  33. #-> varName: name of the target variable
  34. #### [ only the first variable found with the specified name will be returned ]
  35. #-> selector: a CSS selector bor searching the bs4 document/tag for target script
  36. #### [ if inpX if a script-tag/ResultSet/string/list then selector doesn't matter ]
  37. #-> prepFn: should be a univariate function that takes a string and returns a string
  38. #### [ for modifying the script string before searching for and parsing variable ]
  39.  
  40. ### EXAMPLES:
  41. ##### jsonload_from_script(BeautifulSoup('<script>y=8</script>'), 'y') #--> 8
  42. ##### jsonload_from_script(['y=8','x=7'], 'x') #--> 7
  43. ##### jsonload_from_script('y=8;x=7', 'x') #--> 7
  44. ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'x') #--> {'a':1}
  45. ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'w') #--> 'lorem'
Advertisement
Add Comment
Please, Sign In to add comment