Advertisement
Guest User

PTT #1Z0PBY9H (Python)

a guest
Aug 22nd, 2022
416
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.29 KB | None | 0 0
  1. # 文章網址 https://www.ptt.cc/bbs/Python/M.1661047522.A.251.html
  2.  
  3. import json
  4. import re
  5. import urllib.request as url_req
  6.  
  7. url = "https://tw.screener.finance.yahoo.net/future/"
  8. query_args = "q?_=1661047090480&callback=jQuery111307735144102166103_1661047090479&mkt=01&perd=1m&sym=WTX%26&type=tick"
  9.  
  10. headers = {
  11.     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
  12.     "Sec-CH-UA-Platform": "Windows",
  13. }
  14. req = url_req.Request(url + query_args, headers=headers)
  15.  
  16.  
  17. def parsing_invalid_number(match):
  18.     s = match.group()
  19.     if not s.lstrip("0").isnumeric():
  20.         return s
  21.     s = s.lstrip("0")
  22.     if "." in s:
  23.         return "{:f}".format(float(s))
  24.     return "{:d}".format(int(s))
  25.  
  26.  
  27. def load_json(rawdata: str):
  28.     match = re.match(r"(jQuery.+\()({.+})(\);)", rawdata)
  29.     if not match:
  30.         raise ValueError("Cannot find match in: {:s}".format(rawdata))
  31.     json_text = match.groups()[1]
  32.     json_text = re.sub(r"\d+\.?\d*", parsing_invalid_number, json_text)
  33.     json_text = json_text.replace("sections", '"sections"')
  34.     return json.loads(json_text)
  35.  
  36.  
  37. with url_req.urlopen(req) as response:
  38.     data = response.read().decode("utf-8")
  39.  
  40. data_json = load_json(data)
  41.  
  42. print(data_json)
  43.  
Tags: JSON python regex
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement