Guest User

Untitled

a guest
Aug 27th, 2020
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.01 KB | None | 0 0
  1. import json
  2. import re
  3. import time
  4. # 外部ライブラリ
  5. # pip install pandas requests beautifulsoup4
  6. import pandas as pd
  7. import requests
  8. from bs4 import BeautifulSoup
  9.  
  10. res = requests.get("https://teratail.com/users?tab=ranking-total")
  11. soup = BeautifulSoup(res.content)
  12.  
  13.  
  14. def get_extra_info(user_id):
  15.     # 「ベストアンサー」「「+/-」の合計」を取得
  16.     payload = {"id": user_id, "listModel": "Reply"}
  17.     res = requests.post("https://teratail.com/Users/getListDataAjax", data=payload)
  18.     j = json.loads(res.content)
  19.     time.sleep(0.3)
  20.  
  21.     return j["best_answer_total"], j["sum_vote"]
  22.  
  23.  
  24. def print_table(df):
  25.     # マークダウンで出力
  26.     df = df[["順位", "名前", "スコア", "平均スコア", "ベストアンサー率"]]
  27.     print(df.to_markdown(index=False).replace(" ", "_"))
  28.  
  29.  
  30. df = []
  31. for user in soup.find_all(class_="boxStat"):
  32.     order = int(re.findall(r"\d+", user.select_one(".txtOrder").text)[0])
  33.     name = user.a.text
  34.     score = int(user.select_one(".txtUserPoint__num--score").text.replace(",", ""))
  35.     n_questions = int(user.select_one(".txtUserPoint__num--Question").text)
  36.     n_answers = int(user.select_one(".txtUserPoint__num--Answer").text)
  37.  
  38.     print(f"fetching data... {order}-{name}")
  39.     user_id = user.find("img", alt=name)["src"].split("/")[-2]
  40.     n_best_answer, sum_vote = get_extra_info(user_id)
  41.  
  42.     tags = [x.text for x in user.select(".txtUserStatusTags__tagName")]
  43.  
  44.     df.append(
  45.         {
  46.             "順位": order,
  47.             "名前": name,
  48.             "スコア": score,
  49.             "回答数": n_answers,
  50.             "質問数": n_questions,
  51.             "ベストアンサー": n_best_answer,
  52.             "「+/-」の合計": sum_vote,
  53.             "タグ": tags,
  54.         }
  55.     )
  56.  
  57. df = pd.DataFrame(df)
  58. df["平均スコア"] = (df["スコア"] / df["回答数"]).map("{:,.1f}".format)
  59. df["ベストアンサー率"] = (df["ベストアンサー"] / df["回答数"]).map("{:,.0%}".format)
  60.  
  61. df.to_csv("users.csv")
Add Comment
Please, Sign In to add comment