Advertisement
Guest User

Untitled

a guest
Mar 30th, 2020
831
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.81 KB | None | 0 0
  1. """
  2. Fetch most common close reasons on Linguistics SE.
  3.  
  4. Author: lemontree @StackExchange
  5. Python 3.8
  6. """
  7.  
  8. # SE API doc: https://api.stackexchange.com/docs
  9.  
  10. import requests, json
  11.  
  12. ###########
  13. # helpers
  14. ###########
  15. import time, sys
  16.  
  17. # print countdown while sleeping
  18. def wait(t):
  19.     for remaining in range(t, 0, -1):
  20.         mins, secs = divmod(remaining, 60)
  21.         sys.stdout.write("\r")
  22.         sys.stdout.write("sleeping for {:02d}:{:02d} ...".format(mins, secs))
  23.         sys.stdout.flush()
  24.         time.sleep(1)
  25.  
  26. # extract substring from string
  27. def substr(string, start, end):
  28.     if start:
  29.         string = string[string.index(start) + len(start):] if start in string else string
  30.     if end:
  31.         string = string[:string.index(end)] if end in string else string
  32.     return string
  33.  
  34. # map close explanations to concise close reasons
  35. reason_map = {
  36.     "This question has been asked before and already has an answer. If those answers do not fully address your question"
  37.         ", please <a href=\"/questions/ask\">ask a new question</a>.": "duplicate",
  38.     "This question was marked as an exact duplicate of an existing question.": "duplicate",
  39.     "<b>Language-specific grammar and usage questions</b> are off-topic unless primarily concerned with linguistics rat"
  40.         "her than usage. There are many language-specific sites where such questions are welcomed; see: http://stackex"
  41.         "change.com/sites": "off-topic: language-specific",
  42.     "Questions seeking help with <b>identification of or translation of specific samples of text</b> are off-topic.":
  43.         "off-topic: langauge identification and translation",
  44.     "Questions requesting to make <b>syntax trees</b> are not within the scope defined in the <a href=\"https://linguis"
  45.         "tics.stackexchange.com/help/on-topic\">help center</a>. For any doubt, please <a href=\"https://linguistics.me"
  46.         "ta.stackexchange.com/questions/ask\">ask on Meta</a>.": "off-topic: syntax trees",
  47.     "": "off-topic: other site",
  48.     "<ul class=\"close-as-off-topic-status-list\"><li>This question does not appear to be about linguistics within the "
  49.         "scope defined in the <a href=\"https://linguistics.stackexchange.com/help/on-topic\">help center</a>.</li></ul"
  50.         ">": "off-topic: other",
  51.     "Please clarify your specific problem or add additional details to highlight exactly what you need.   As it&#39;s c"
  52.         "urrently written, it’s hard to tell exactly what you&#39;re asking. See the <a href=\"https://linguistics.stac"
  53.         "kexchange.com/help/how-to-ask\">How to Ask</a> page for help clarifying this question.":
  54.         "needs details or clarity",
  55.     "Please edit the question to limit it to a specific problem with enough detail to identify an adequate answer.  Avo"
  56.         "id asking multiple distinct questions at once. See the <a href=\"/help/how-to-ask\">How to Ask</a> page for he"
  57.         "lp clarifying this question.": "needs more focus",
  58.     "As it currently stands, this question is not a good fit for our Q&amp;A format. We expect answers to be supported"
  59.         " by facts, references,   or expertise, but this question will likely solicit debate, arguments, polling, or ex"
  60.         "tended discussion. If you feel that this question   can be improved and possibly reopened, <a href=\"/help/reo"
  61.         "pen-questions\">visit the help center</a> for guidance.": "opinion-based",
  62.     "Many good questions generate some degree of opinion based on expert experience, but answers to this question   wil"
  63.         "l tend to be almost entirely based on opinions, rather than facts, references, or specific expertise.":
  64.         "opinion-based"
  65. }
  66.  
  67. ###########
  68. # set-up
  69. ###########
  70. results = dict()
  71.  
  72. # specify which fields to include in the JSON response by creating a filter once
  73. # https://api.stackexchange.com/docs/create-filter
  74. filter = "!-*jbN)uAeZAf"
  75. # qfilter = requests.get("https://api.stackexchange.com/2.2/filters/create?unsafe=false&filter=" + filter)
  76.  
  77. # define query parameters
  78. parameters = {
  79.     "order": "desc",
  80.     "sort": "activity",
  81.     "pagesize": 50,
  82.     "site": "linguistics",
  83.     "filter": filter
  84. }
  85. last_page = 65  # process questions until p. 65 (~= 3 years old) at 50 questions per page
  86.  
  87. ###########
  88. # query
  89. ###########
  90. for p in range(1, last_page+1):  # iterate through all pages (questions have to be fetched page-wise)
  91.     # avoid throttle: wait 1 min every 10 pages
  92.     if p % 10 == 1 and p != 1:
  93.         wait(60)
  94.     parameters["page"] = p
  95.     print("fetching questions on page " + str(p) + " ...")
  96.     # request questions on current page
  97.     questions = requests.get("https://api.stackexchange.com/2.2/questions", params=parameters).json()
  98.     # print(json.dumps(questions, indent=2))
  99.     questions = questions["items"]
  100.  
  101.     for q in questions:  # iterate through the questions on page p
  102.         # closed_reason = q["closed_reason"] if "closed_reason" in q else None
  103.         closed = q["closed_details"]["description"] if "closed_details" in q and "description" in q["closed_details"] \
  104.             else None
  105.         if closed:
  106.             # crop close vote explanation from HTML string, map to reason and add to results
  107.             closed = substr(closed, "&quot;", "&quot;")
  108.             closed = reason_map[closed] if closed in reason_map else closed
  109.             results[closed] = results.get(closed, 0) + 1
  110.  
  111. print("done\n")
  112.  
  113. ###########
  114. # processing
  115. ###########
  116. # add non-occurring close reasons
  117. no_occ = {reason: 0 for reason in reason_map.values() if reason not in results}
  118. results = {**results, **no_occ}
  119. # print close reasons sorted by number of questions closed for that reason
  120. for rank, res in enumerate(sorted(results.items(), key=lambda itm: itm[1], reverse=True)):
  121.     reason = res[0]
  122.     count = res[1]
  123.     print(str(rank+1) + ". " + "(" + str(count) + "): " + reason)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement