Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## For extracting a [JSON] variable by name from JavaScript string or bs4 script-tag ##
- ## simpler [and more limited] version of findObj_inJS @ https://pastebin.com/UVcLniSG #
- ## example at https://pastebin.com/5AjaUne0 [other examples at bottom] ################
- # from bs4 import BeautifulSoup
- import json
- ## [ inputs explained after definition ]
- def jsonload_from_script(inpX, varName, selector='script', prepFn=lambda x:x):
- try: sList = [inpX] if inpX.name=='script' else inpX.select(selector)
- except: sList = inpX if isinstance(inpX, list) else [inpX]
- for s in sList:
- s = getattr(s, 'string', s)
- if not (isinstance(s,str) and s.strip()): continue
- sections = prepFn(s.strip()).split(';')
- for i, section in enumerate(sections):
- name, val = ['', *section.split('=', 1)][-2:]
- if '=' in section and name.strip()==varName:
- sections,t1 = [val]+sections[i+1:],True
- try: return json.loads(val)
- except: break
- else: continue
- while len(sections) > 1:
- try: return json.loads(';'.join(sections))
- except: sections = sections[:-1]
- ### end ###
- ### INPUTS
- #-> inpX: must be a bs4 document/tag/ResultSet or a string or a list of strings
- #### [ target variable must be JSON and seaparated from other variables by ; ]
- #-> varName: name of the target variable
- #### [ only the first variable found with the specified name will be returned ]
- #-> selector: a CSS selector bor searching the bs4 document/tag for target script
- #### [ if inpX if a script-tag/ResultSet/string/list then selector doesn't matter ]
- #-> prepFn: should be a univariate function that takes a string and returns a string
- #### [ for modifying the script string before searching for and parsing variable ]
- ### EXAMPLES:
- ##### jsonload_from_script(BeautifulSoup('<script>y=8</script>'), 'y') #--> 8
- ##### jsonload_from_script(['y=8','x=7'], 'x') #--> 7
- ##### jsonload_from_script('y=8;x=7', 'x') #--> 7
- ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'x') #--> {'a':1}
- ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'w') #--> 'lorem'
Advertisement
Add Comment
Please, Sign In to add comment