Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - ## For extracting a [JSON] variable by name from JavaScript string or bs4 script-tag ##
 - ## simpler [and more limited] version of findObj_inJS @ https://pastebin.com/UVcLniSG #
 - ## example at https://pastebin.com/5AjaUne0 [other examples at bottom] ################
 - # from bs4 import BeautifulSoup
 - import json
 - ## [ inputs explained after definition ]
 - def jsonload_from_script(inpX, varName, selector='script', prepFn=lambda x:x):
 - try: sList = [inpX] if inpX.name=='script' else inpX.select(selector)
 - except: sList = inpX if isinstance(inpX, list) else [inpX]
 - for s in sList:
 - s = getattr(s, 'string', s)
 - if not (isinstance(s,str) and s.strip()): continue
 - sections = prepFn(s.strip()).split(';')
 - for i, section in enumerate(sections):
 - name, val = ['', *section.split('=', 1)][-2:]
 - if '=' in section and name.strip()==varName:
 - sections,t1 = [val]+sections[i+1:],True
 - try: return json.loads(val)
 - except: break
 - else: continue
 - while len(sections) > 1:
 - try: return json.loads(';'.join(sections))
 - except: sections = sections[:-1]
 - ### end ###
 - ### INPUTS
 - #-> inpX: must be a bs4 document/tag/ResultSet or a string or a list of strings
 - #### [ target variable must be JSON and seaparated from other variables by ; ]
 - #-> varName: name of the target variable
 - #### [ only the first variable found with the specified name will be returned ]
 - #-> selector: a CSS selector bor searching the bs4 document/tag for target script
 - #### [ if inpX if a script-tag/ResultSet/string/list then selector doesn't matter ]
 - #-> prepFn: should be a univariate function that takes a string and returns a string
 - #### [ for modifying the script string before searching for and parsing variable ]
 - ### EXAMPLES:
 - ##### jsonload_from_script(BeautifulSoup('<script>y=8</script>'), 'y') #--> 8
 - ##### jsonload_from_script(['y=8','x=7'], 'x') #--> 7
 - ##### jsonload_from_script('y=8;x=7', 'x') #--> 7
 - ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'x') #--> {'a':1}
 - ##### jsonload_from_script('y=8;x={"a":1};w="lorem";', 'w') #--> 'lorem'
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment