furas

Python - BS - scraping from JavaScript

Apr 12th, 2020 (edited)
384
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # author: Bartlomiej "furas" Burek (https://blog.furas.pl)
  2. # 2020.05.12
  3.  
  4. html = '''<form class="col-xs-12"> <script type="text/javascript">
  5.  
  6.    var stocks = {"938975":true,"938977":true,"938979":true,"938981":true,"938983":true,"938985":true,"938987":false,"938989":true,"938991":true,"938993":true,"938995":true,"938997":false,"938999":true,"939001":true,"939003":true,"939005":true,"939007":true};
  7.    
  8.    </script>'''
  9.  
  10. from bs4 import BeautifulSoup as BS
  11. import json
  12.  
  13. soup = BS(html, 'lxml')
  14.  
  15. text = soup.find('script').text
  16. lines = text.splitlines()
  17. text = lines[2]
  18. text = text.strip()
  19. text = text[13:-1]
  20.  
  21. data = json.loads(text)
  22. #print(data)
  23.  
  24. for key, value in data.items():
  25.     print(key, '->', value)
  26.  
  27. # Result
  28.  
  29. """
  30. 938975 -> True
  31. 938977 -> True
  32. 938979 -> True
  33. 938981 -> True
  34. 938983 -> True
  35. 938985 -> True
  36. 938987 -> False
  37. 938989 -> True
  38. 938991 -> True
  39. 938993 -> True
  40. 938995 -> True
  41. 938997 -> False
  42. 938999 -> True
  43. 939001 -> True
  44. 939003 -> True
  45. 939005 -> True
  46. 939007 -> True
  47. """
RAW Paste Data