Advertisement
Guest User

rfa stats code

a guest
Apr 10th, 2022
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 17.38 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. import MySQLdb
  5. import sys
  6. import os
  7. import traceback
  8. import cgi
  9. import urllib
  10. import re
  11. import datetime
  12. import time
  13. import htmllib
  14.  
  15. starttime = time.time()
  16. voteregex = re.compile("\n#(?!(?:<s>|:)).*?\(UTC\)", re.IGNORECASE) #may need to add "{{unsigned" and "<span class="autosigned"> as optional terminators to reduce errors due to idiots not signing their votes
  17. userregex = re.compile("\[\[User.*?:(.*?)(?:\||(?:\]\]))", re.IGNORECASE)
  18. timeregex = re.compile("(\d{2}:\d{2}, .*?) \(UTC\)")
  19. timeparseregex = re.compile("\d{2}:\d{2}, (\d{1,2}) ([A-Za-z]*) (\d{4})")
  20. timeunparseregex = re.compile("([A-Za-z]*) (\d{1,2}), (\d{4})")
  21. timestampparseregex = re.compile("(\d{4})-(\d{2})-(\d{2})")
  22. monthmap = {"01":"January", "02":"February", "03":"March", "04":"April", "05":"May", "06":"June", "07":"July", "08":"August", "09":"September", "10":"October", "11":"November", "12":"December"}
  23. username = ""
  24. maxsearch = 50
  25. maxlimit = 250
  26. startdate = ""
  27. altusername = ""
  28. showall = False
  29. matchstats = [0,0]  #matches, non-matches
  30. votecounts = [0,0,0,0] #supports, opposes, neutrals, comments/unparseables
  31. tablelist = []
  32.  
  33.  
  34. def main():
  35.     global username
  36.     global maxsearch
  37.     global maxlimit
  38.     global startdate
  39.     global altusername
  40.     global showall
  41.     print """<!doctype html>
  42. <HTML>
  43. <HEAD>
  44. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  45. <LINK href="http://toolserver.org/~snottywong/greyscale.css" rel="stylesheet" type="text/css">
  46. <LINK href="http://toolserver.org/~snottywong/menubar3.css" rel="stylesheet" type="text/css">
  47. <TITLE>RfA Vote Counter</TITLE>
  48. </HEAD>
  49. <BODY id="no">
  50. <script type="text/javascript" src="/~snottywong/menubar.js"></script>
  51. <br>
  52. <div style="width:875px;">
  53. <a href="http://toolserver.org/~snottywong/rfastats.html"><small>&larr;New search</small></a>
  54. """
  55.     try:
  56.         db = MySQLdb.connect(db='enwiki_p', host="enwiki-p.rrdb.toolserver.org", read_default_file=os.path.expanduser("~/.my.cnf"))
  57.         cursor = db.cursor()
  58.         form = cgi.FieldStorage()
  59.         if "name" not in form:
  60.             errorout("No name entered.")
  61.         else:
  62.             if "max" in form:
  63.                 try:
  64.                     maxsearch = min(maxlimit, int(form['max'].value))
  65.                 except:
  66.                     maxsearch = 50
  67.             if "startdate" in form:
  68.                 try:
  69.                     startdate = form['startdate'].value
  70.                     startdate = startdate.ljust(14, "0")
  71.                     if len(startdate) != 14 or int(startdate) < 20000000000000 or int(startdate) > 20150000000000:
  72.                         startdate = None
  73.                 except:
  74.                     pass
  75.             if "altname" in form:
  76.                 altusername = form['altname'].value
  77.                
  78.             if os.environ["HTTP_X_FORWARDED_FOR"].startswith("89.151.116.5"):
  79.                 errorout("Your IP address has been flagged for potential abuse.  Please post a message to User talk:Scottywong on the English Wikipedia, or alternatively send an email to snottywong.wiki@gmail.com to prove that you are a human, and to explain why you've been consistently making so many queries on this tool.")
  80.  
  81.             try:
  82.                 username = form['name'].value.replace("_", " ").replace("+", " ")
  83.                 username = username[0].capitalize() + username[1:]
  84.                 f = open("/home/snottywong/rfastatslog.txt", "a")
  85.                 f.write("<log><ip>" + os.environ["HTTP_X_FORWARDED_FOR"] + "</ip><username>" + username + "</username><max>" + str(maxsearch) + "</max><timestamp>" + datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "</timestamp>" + ("<startdate>" + startdate + "</startdate>" if startdate else "") + ("<altname>" + altusername + "</altname>" if altusername else "") + "</log>\n")
  86.                 f.close()
  87.                 cursor.execute(u'SELECT user_id FROM user WHERE user_name=%s;', (username))
  88.                 userid = cursor.fetchall()[0][0]
  89.             except:
  90.                 #errorout("Username not found." + traceback.print_exc(file=sys.stdout))
  91.                 errorout("Username not found.")
  92.             if "showall" in form:
  93.                 if form['showall'].value.lower() == "true":
  94.                     showall = True
  95.                
  96.         cursor = db.cursor()
  97.         if startdate:
  98.             cursor.execute("""
  99. SELECT DISTINCT page_title
  100. FROM revision JOIN page ON rev_page=page_id
  101. WHERE rev_user=%s
  102. AND page_namespace=4
  103. AND page_title LIKE "Requests_for_adminship/%%"
  104. AND rev_timestamp<=%s
  105. ORDER BY rev_timestamp DESC;""",
  106.             (userid, startdate)
  107.                            )
  108.         else:
  109.             cursor.execute("""
  110. SELECT DISTINCT page_title
  111. FROM revision JOIN page ON rev_page=page_id
  112. WHERE rev_user=%s
  113. AND page_namespace=4
  114. AND page_title LIKE "Requests_for_adminship/%%"
  115. ORDER BY rev_timestamp DESC;""",
  116.             (userid)
  117.                            )
  118.            
  119.         results = cursor.fetchall()
  120.         db.close()
  121.  
  122.         print "<div style=\"width:875px;\"><h1>RfA voting statistics for User:" + username + "</h1>\n"
  123.         if len(results) == 0:
  124.             errorout("No RfA's found.  Try a different date range.  Also, note that if the user's username does not appear in the wikitext of their signature, you may need to specify an alternate name.")
  125.         else:
  126.             print "These statistics were compiled by an automated process, and may contain errors or omissions due to the wide variety of styles with which people cast votes at RfA.\n"
  127.             print "<br><h2>Vote totals</h2>\n"
  128.             datestr = ""
  129.             if startdate:
  130.                 datestr = " from " + startdate[4:6] + "/" + startdate[6:8] + "/" + startdate[:4] + " and earlier"
  131.             print "Total number of unique RfA pages edited by " + username + datestr + ": " + str(len(results)) + "<br>\n"
  132.             print "Analyzed the last " + str(min(maxsearch, len(results))) + " votes by this user.<br>\n"
  133.             analyze(results[:min(maxsearch, len(results))])
  134.             printtable()
  135.         elapsed = time.time() - starttime
  136.         print "</div>\n<br><br><small>Elapsed time: " + str(round(elapsed, 2)) + " seconds.<br>\n"
  137.         print datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "</small><br>"
  138.     except SystemExit:
  139.         pass
  140.     except:
  141.         errorout("Unhandled exception.<br><br>" + traceback.print_exc(file=sys.stdout))
  142.  
  143. def analyze(pages):
  144.     global tablelist
  145.     global votecounts
  146.     if len(pages) <= 50:
  147.         alldata = APIgetlotsofpages(pages)
  148.     else:
  149.         alldata = {}
  150.         for i in range(0, len(pages), 50):
  151.             newdata = APIgetlotsofpages(pages[i:min(i+50, len(pages))])
  152.             alldata = dict(alldata.items() + newdata.items())
  153.     for entry in pages:
  154.         try:
  155.             page = entry[0]
  156.             data = alldata["Wikipedia:" + page.replace("_", " ")]
  157.             result = findresults(data[:data.find("==Nomination==")])
  158.             if data.find("\n=====Support=====") >= 0:
  159.                 supportvotes = voteregex.findall(data[data.find("\n=====Support====="):data.find("\n=====Oppose=====")])
  160.                 opposevotes = voteregex.findall(data[data.find("\n=====Oppose====="):data.find("\n=====Neutral=====")])
  161.                 neutralvotes = voteregex.findall(data[data.find("\n=====Neutral====="):])
  162.             else:
  163.                 supportvotes = voteregex.findall(data[data.find("\n'''Support'''"):data.find("\n'''Oppose'''")])    #Older style of formatting
  164.                 opposevotes = voteregex.findall(data[data.find("\n'''Oppose'''"):data.find("\n'''Neutral'''")])
  165.                 neutralvotes = voteregex.findall(data[data.find("\n'''Neutral'''"):])
  166.                
  167.             foundvote = False
  168.            
  169.             for vote in supportvotes:
  170.                 if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
  171.                     votermatch = userregex.match(vote[vote.rfind("[[User"):])
  172.                 else:
  173.                     votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):])  #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
  174.                 if votermatch == None:
  175.                     continue
  176.                 else:
  177.                     voter = votermatch.group(1).strip().replace("_", " ")
  178.                     if voter.lower() == username.lower() or voter.lower() == altusername.lower():     #found our user's vote
  179.                         timematch = timeregex.search(vote)
  180.                         if timematch == None:
  181.                             votetime = ""
  182.                         else:
  183.                             votetime = parsetime(timematch.group(1))
  184.                         tablelist.append((page, "Support", votetime, result, match("Support", result)))
  185.                         votecounts[0] += 1
  186.                         foundvote = True
  187.                         break
  188.  
  189.             if not foundvote:
  190.                 for vote in opposevotes:
  191.                     if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
  192.                         votermatch = userregex.match(vote[vote.rfind("[[User"):])
  193.                     else:
  194.                         votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):])  #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
  195.                     if votermatch == None:
  196.                         continue
  197.                     else:
  198.                         voter = votermatch.group(1).strip()
  199.                         if voter.lower() == username.lower() or voter.lower() == altusername.lower():     #found our user's vote
  200.                             timematch = timeregex.search(vote)
  201.                             if timematch == None:
  202.                                 votetime = ""
  203.                             else:
  204.                                 votetime = parsetime(timematch.group(1))
  205.                             tablelist.append((page, "Oppose", votetime, result, match("Oppose", result)))
  206.                             votecounts[1] += 1
  207.                             foundvote = True
  208.                             break
  209.  
  210.             if not foundvote:
  211.                 for vote in neutralvotes:
  212.                     if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
  213.                         votermatch = userregex.match(vote[vote.rfind("[[User"):])
  214.                     else:
  215.                         votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):])  #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
  216.  
  217.                     if votermatch == None:
  218.                         continue
  219.                     else:
  220.                         voter = votermatch.group(1).strip()
  221.                         if voter.lower() == username.lower() or voter.lower() == altusername.lower():     #found our user's vote
  222.                             timematch = timeregex.search(vote)
  223.                             if timematch == None:
  224.                                 votetime = ""
  225.                             else:
  226.                                 votetime = parsetime(timematch.group(1))
  227.                             tablelist.append((page, "Neutral", votetime, result, match("Neutral", result)))
  228.                             votecounts[2] += 1
  229.                             foundvote = True
  230.                             break
  231.                        
  232.             if not foundvote:       #The user edited this page but didn't vote, or vote wasn't parseable
  233.                 votecounts[3] += 1
  234.                 if showall:
  235.                     tablelist.append((page, "Comments", None, result, None))
  236.  
  237.         except:
  238.             #errorout("Fatal error while parsing votes.<br>" + traceback.print_exc(file=sys.stdout))
  239.             continue
  240.  
  241. def findresults(thepage):
  242.     if "The following discussion is preserved as an archive of a [[wikipedia:requests for adminship|request for adminship]] that '''did not succeed'''" in thepage:
  243.         return "Unsuccessful"
  244.     elif "The following discussion is preserved as an archive of a '''successful''' [[wikipedia:requests for adminship|request for adminship]]" in thepage:
  245.         return "Successful"
  246.     else:
  247.         return "Not closed yet"
  248.        
  249.  
  250. def parsetime(t):
  251.     tm = timeparseregex.search(t)
  252.     if tm == None:
  253.         return ""
  254.     else:
  255.         return tm.group(2) + " " + tm.group(1) + ", " + tm.group(3)
  256.  
  257. def match(v, r):
  258.     if r == "Not closed yet":
  259.         return None
  260.     if v == "Neutral":
  261.         return None
  262.     if v == "Support" and r == "Successful":
  263.         matchstats[0] += 1
  264.         return True
  265.     if v == "Oppose" and r == "Unsuccessful":
  266.         matchstats[0] += 1
  267.         return True
  268.     matchstats[1] += 1
  269.     return False
  270.  
  271. def APIgetlotsofpages(rawpagelist):
  272.     try:
  273.         p = ''
  274.         for page in rawpagelist:
  275.             p += urllib.quote("Wikipedia:" + page[0].replace("_", " ") + "|")
  276.         u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=xml&titles=" + p[:-3])
  277.         xml = u.read()
  278.         u.close()
  279.         pagelist = re.findall(r'<page.*?>.*?</page>', xml, re.DOTALL)
  280.         pagedict = {}
  281.         for i in pagelist:
  282.             try:
  283.                 pagename = re.search(r'<page.*?title=\"(.*?)\">', i).group(1)
  284.                 text = re.search(r'<rev xml:space="preserve">(.*?)</rev>', i, re.DOTALL).group(1)
  285.                 pagedict[unescape(pagename)] = text
  286.             except:
  287.                 #print sys.exc_info()[0]
  288.                 #print "<br>"
  289.                 #print traceback.print_exc(file=sys.stdout)
  290.                 #print "<br>"
  291.                 continue
  292.         return pagedict
  293.     except:
  294.         #print sys.exc_info()[0]
  295.         #print "<br>"
  296.         #print traceback.print_exc(file=sys.stdout)
  297.         #print "<br>"
  298.         errorout("Error getting RfA pages from API.  Please try again later.")
  299.  
  300. def link(p):
  301.     text = cgi.escape(p.replace("_", " ")[23:])
  302.     if len(text) > 64:
  303.         text = text[:61] + "..."
  304.     return '<a href="http://en.wikipedia.org/wiki/Wikipedia:' + urllib.quote(p) + '">' + text + '</a>'
  305.  
  306. def unescape(s):
  307.     p = htmllib.HTMLParser(None)
  308.     p.save_bgn()
  309.     p.feed(s)
  310.     return p.save_end()
  311.  
  312. def datefmt(datestr):
  313.     tg = timeunparseregex.search(datestr)
  314.     if tg == None:
  315.         return
  316.     month = [k for k,v in monthmap.items() if v==tg.group(1)][0]
  317.     day = tg.group(2)
  318.     year = tg.group(3)
  319.     if len(day) == 1:
  320.         day = "0" + day
  321.     return year + month + day
  322.  
  323. def errorout(errorstr): #prints error string and exits
  324.     print "<br><br>ERROR: " + errorstr + "<br><br>Please try again.<br><br>"
  325.     print "</div></BODY>\n</HTML>"
  326.     sys.exit(0)
  327.  
  328. def printtable():
  329.     global votecounts
  330.     print "<ul>\n"
  331.     totalvotes = sum(votecounts)
  332.     print "<li>Support votes: " + str(votecounts[0]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[0] / totalvotes, 1))) + "%)</li>"
  333.     print "<li>Oppose votes: " + str(votecounts[1]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[1] / totalvotes, 1))) + "%)</li>"
  334.     print "<li>Neutral votes: " + str(votecounts[2]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[2] / totalvotes, 1))) + "%)</li>"
  335.     print "<li>Comments or unparseable votes: " + str(votecounts[3]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[3] / totalvotes, 1))) + "%)</li>"
  336.     if sum(matchstats):
  337.         print "<li>This user's vote matched the end result of the RfA " + str(matchstats[0]) + " times, or " + str(round(100.0 * matchstats[0] / sum(matchstats), 1)) + "% of the time.</li>"
  338.     print "</ul><br>"
  339.  
  340.     print "<h2>Individual RfA's</h2><br>"
  341.  
  342.     if len(tablelist) > 0 and tablelist[-1][2] and sum(votecounts) == maxsearch:
  343.         print '<a href="http://toolserver.org/~snottywong/cgi-bin/rfastats.cgi?name=' + username.replace(" ", "_") + '&max=' + str(maxsearch) + '&startdate=' + datefmt(tablelist[-1][2]) + '&altname=' + altusername + ('&showall=true' if showall else '') + '"><small>Next ' + str(maxsearch) + " votes &rarr;</small></a><br>"
  344.     print """</div>
  345. <table>
  346. <thead>
  347. <tr>
  348. <th scope="col">RfA</th>
  349. <th scope="col">Date</th>
  350. <th scope="col">Vote</th>
  351. <th scope="col">Result</th>
  352. </tr>
  353. </thead>
  354. <tbody>
  355. """
  356.  
  357.     for i in tablelist:
  358.         print "<tr>\n"
  359.         print "<td>" + link(i[0]) + "</td>"
  360.         print "<td>" + (i[2] if i[2] else "N/A") + "</td>"
  361.         print "<td>" + i[1] + "</td>"
  362.         if i[4] == True:
  363.             print '<td class="y">' + i[3] + '</td>'
  364.         elif i[4] == False:
  365.             print '<td class="n">' + i[3] + '</td>'
  366.         elif i[4] == None:
  367.             print '<td class="m">' + i[3] + '</td>'
  368.         print "</tr>"
  369.     print "</tbody>\n</table>\n"
  370.     if len(tablelist) > 0 and tablelist[-1][2] and sum(votecounts) == maxsearch:
  371.         print '<a href="http://toolserver.org/~snottywong/cgi-bin/rfastats.cgi?name=' + username.replace(" ", "_") + '&max=' + str(maxsearch) + '&startdate=' + datefmt(tablelist[-1][2]) + '&altname=' + altusername + ('&showall=true' if showall else '') + '"><small>Next ' + str(maxsearch) + " votes &rarr;</small></a><br>"
  372.  
  373.  
  374. main()
  375. print '<small>Bugs, suggestions, questions?  Contact the author at <a href="http://en.wikipedia.org/wiki/User_talk:Snottywong">User talk:Snottywong</a></small><br>'
  376. print '<a href="http://toolserver.org/~snottywong/rfastats.html"><small>&larr;New search</small></a>'
  377. print "</div></BODY>\n</HTML>"
  378.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement