Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 文章網址 https://www.ptt.cc/bbs/Python/M.1661047522.A.251.html
- import json
- import re
- import urllib.request as url_req
- url = "https://tw.screener.finance.yahoo.net/future/"
- query_args = "q?_=1661047090480&callback=jQuery111307735144102166103_1661047090479&mkt=01&perd=1m&sym=WTX%26&type=tick"
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
- "Sec-CH-UA-Platform": "Windows",
- }
- req = url_req.Request(url + query_args, headers=headers)
- def parsing_invalid_number(match):
- s = match.group()
- if not s.lstrip("0").isnumeric():
- return s
- s = s.lstrip("0")
- if "." in s:
- return "{:f}".format(float(s))
- return "{:d}".format(int(s))
- def load_json(rawdata: str):
- match = re.match(r"(jQuery.+\()({.+})(\);)", rawdata)
- if not match:
- raise ValueError("Cannot find match in: {:s}".format(rawdata))
- json_text = match.groups()[1]
- json_text = re.sub(r"\d+\.?\d*", parsing_invalid_number, json_text)
- json_text = json_text.replace("sections", '"sections"')
- return json.loads(json_text)
- with url_req.urlopen(req) as response:
- data = response.read().decode("utf-8")
- data_json = load_json(data)
- print(data_json)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement