Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import re
- import time
- # 外部ライブラリ
- # pip install pandas requests beautifulsoup4
- import pandas as pd
- import requests
- from bs4 import BeautifulSoup
- res = requests.get("https://teratail.com/users?tab=ranking-total")
- soup = BeautifulSoup(res.content)
- def get_extra_info(user_id):
- # 「ベストアンサー」「「+/-」の合計」を取得
- payload = {"id": user_id, "listModel": "Reply"}
- res = requests.post("https://teratail.com/Users/getListDataAjax", data=payload)
- j = json.loads(res.content)
- time.sleep(0.3)
- return j["best_answer_total"], j["sum_vote"]
- def print_table(df):
- # マークダウンで出力
- df = df[["順位", "名前", "スコア", "平均スコア", "ベストアンサー率"]]
- print(df.to_markdown(index=False).replace(" ", "_"))
- df = []
- for user in soup.find_all(class_="boxStat"):
- order = int(re.findall(r"\d+", user.select_one(".txtOrder").text)[0])
- name = user.a.text
- score = int(user.select_one(".txtUserPoint__num--score").text.replace(",", ""))
- n_questions = int(user.select_one(".txtUserPoint__num--Question").text)
- n_answers = int(user.select_one(".txtUserPoint__num--Answer").text)
- print(f"fetching data... {order}-{name}")
- user_id = user.find("img", alt=name)["src"].split("/")[-2]
- n_best_answer, sum_vote = get_extra_info(user_id)
- tags = [x.text for x in user.select(".txtUserStatusTags__tagName")]
- df.append(
- {
- "順位": order,
- "名前": name,
- "スコア": score,
- "回答数": n_answers,
- "質問数": n_questions,
- "ベストアンサー": n_best_answer,
- "「+/-」の合計": sum_vote,
- "タグ": tags,
- }
- )
- df = pd.DataFrame(df)
- df["平均スコア"] = (df["スコア"] / df["回答数"]).map("{:,.1f}".format)
- df["ベストアンサー率"] = (df["ベストアンサー"] / df["回答数"]).map("{:,.0%}".format)
- df.to_csv("users.csv")
Add Comment
Please, Sign In to add comment