Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Fetch most common close reasons on Linguistics SE.
- Author: lemontree @StackExchange
- Python 3.8
- """
- # SE API doc: https://api.stackexchange.com/docs
- import requests, json
- ###########
- # helpers
- ###########
- import time, sys
- # print countdown while sleeping
- def wait(t):
- for remaining in range(t, 0, -1):
- mins, secs = divmod(remaining, 60)
- sys.stdout.write("\r")
- sys.stdout.write("sleeping for {:02d}:{:02d} ...".format(mins, secs))
- sys.stdout.flush()
- time.sleep(1)
- # extract substring from string
- def substr(string, start, end):
- if start:
- string = string[string.index(start) + len(start):] if start in string else string
- if end:
- string = string[:string.index(end)] if end in string else string
- return string
- # map close explanations to concise close reasons
- reason_map = {
- "This question has been asked before and already has an answer. If those answers do not fully address your question"
- ", please <a href=\"/questions/ask\">ask a new question</a>.": "duplicate",
- "This question was marked as an exact duplicate of an existing question.": "duplicate",
- "<b>Language-specific grammar and usage questions</b> are off-topic unless primarily concerned with linguistics rat"
- "her than usage. There are many language-specific sites where such questions are welcomed; see: http://stackex"
- "change.com/sites": "off-topic: language-specific",
- "Questions seeking help with <b>identification of or translation of specific samples of text</b> are off-topic.":
- "off-topic: langauge identification and translation",
- "Questions requesting to make <b>syntax trees</b> are not within the scope defined in the <a href=\"https://linguis"
- "tics.stackexchange.com/help/on-topic\">help center</a>. For any doubt, please <a href=\"https://linguistics.me"
- "ta.stackexchange.com/questions/ask\">ask on Meta</a>.": "off-topic: syntax trees",
- "": "off-topic: other site",
- "<ul class=\"close-as-off-topic-status-list\"><li>This question does not appear to be about linguistics within the "
- "scope defined in the <a href=\"https://linguistics.stackexchange.com/help/on-topic\">help center</a>.</li></ul"
- ">": "off-topic: other",
- "Please clarify your specific problem or add additional details to highlight exactly what you need. As it's c"
- "urrently written, itβs hard to tell exactly what you're asking. See the <a href=\"https://linguistics.stac"
- "kexchange.com/help/how-to-ask\">How to Ask</a> page for help clarifying this question.":
- "needs details or clarity",
- "Please edit the question to limit it to a specific problem with enough detail to identify an adequate answer. Avo"
- "id asking multiple distinct questions at once. See the <a href=\"/help/how-to-ask\">How to Ask</a> page for he"
- "lp clarifying this question.": "needs more focus",
- "As it currently stands, this question is not a good fit for our Q&A format. We expect answers to be supported"
- " by facts, references, or expertise, but this question will likely solicit debate, arguments, polling, or ex"
- "tended discussion. If you feel that this question can be improved and possibly reopened, <a href=\"/help/reo"
- "pen-questions\">visit the help center</a> for guidance.": "opinion-based",
- "Many good questions generate some degree of opinion based on expert experience, but answers to this question wil"
- "l tend to be almost entirely based on opinions, rather than facts, references, or specific expertise.":
- "opinion-based"
- }
- ###########
- # set-up
- ###########
- results = dict()
- # specify which fields to include in the JSON response by creating a filter once
- # https://api.stackexchange.com/docs/create-filter
- filter = "!-*jbN)uAeZAf"
- # qfilter = requests.get("https://api.stackexchange.com/2.2/filters/create?unsafe=false&filter=" + filter)
- # define query parameters
- parameters = {
- "order": "desc",
- "sort": "activity",
- "pagesize": 50,
- "site": "linguistics",
- "filter": filter
- }
- last_page = 65 # process questions until p. 65 (~= 3 years old) at 50 questions per page
- ###########
- # query
- ###########
- for p in range(1, last_page+1): # iterate through all pages (questions have to be fetched page-wise)
- # avoid throttle: wait 1 min every 10 pages
- if p % 10 == 1 and p != 1:
- wait(60)
- parameters["page"] = p
- print("fetching questions on page " + str(p) + " ...")
- # request questions on current page
- questions = requests.get("https://api.stackexchange.com/2.2/questions", params=parameters).json()
- # print(json.dumps(questions, indent=2))
- questions = questions["items"]
- for q in questions: # iterate through the questions on page p
- # closed_reason = q["closed_reason"] if "closed_reason" in q else None
- closed = q["closed_details"]["description"] if "closed_details" in q and "description" in q["closed_details"] \
- else None
- if closed:
- # crop close vote explanation from HTML string, map to reason and add to results
- closed = substr(closed, """, """)
- closed = reason_map[closed] if closed in reason_map else closed
- results[closed] = results.get(closed, 0) + 1
- print("done\n")
- ###########
- # processing
- ###########
- # add non-occurring close reasons
- no_occ = {reason: 0 for reason in reason_map.values() if reason not in results}
- results = {**results, **no_occ}
- # print close reasons sorted by number of questions closed for that reason
- for rank, res in enumerate(sorted(results.items(), key=lambda itm: itm[1], reverse=True)):
- reason = res[0]
- count = res[1]
- print(str(rank+1) + ". " + "(" + str(count) + "): " + reason)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement