Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import MySQLdb
- import sys
- import os
- import traceback
- import cgi
- import urllib
- import re
- import datetime
- import time
- import htmllib
- starttime = time.time()
- voteregex = re.compile("\n#(?!(?:<s>|:)).*?\(UTC\)", re.IGNORECASE) #may need to add "{{unsigned" and "<span class="autosigned"> as optional terminators to reduce errors due to idiots not signing their votes
- userregex = re.compile("\[\[User.*?:(.*?)(?:\||(?:\]\]))", re.IGNORECASE)
- timeregex = re.compile("(\d{2}:\d{2}, .*?) \(UTC\)")
- timeparseregex = re.compile("\d{2}:\d{2}, (\d{1,2}) ([A-Za-z]*) (\d{4})")
- timeunparseregex = re.compile("([A-Za-z]*) (\d{1,2}), (\d{4})")
- timestampparseregex = re.compile("(\d{4})-(\d{2})-(\d{2})")
- monthmap = {"01":"January", "02":"February", "03":"March", "04":"April", "05":"May", "06":"June", "07":"July", "08":"August", "09":"September", "10":"October", "11":"November", "12":"December"}
- username = ""
- maxsearch = 50
- maxlimit = 250
- startdate = ""
- altusername = ""
- showall = False
- matchstats = [0,0] #matches, non-matches
- votecounts = [0,0,0,0] #supports, opposes, neutrals, comments/unparseables
- tablelist = []
- def main():
- global username
- global maxsearch
- global maxlimit
- global startdate
- global altusername
- global showall
- print """<!doctype html>
- <HTML>
- <HEAD>
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
- <LINK href="http://toolserver.org/~snottywong/greyscale.css" rel="stylesheet" type="text/css">
- <LINK href="http://toolserver.org/~snottywong/menubar3.css" rel="stylesheet" type="text/css">
- <TITLE>RfA Vote Counter</TITLE>
- </HEAD>
- <BODY id="no">
- <script type="text/javascript" src="/~snottywong/menubar.js"></script>
- <br>
- <div style="width:875px;">
- <a href="http://toolserver.org/~snottywong/rfastats.html"><small>←New search</small></a>
- """
- try:
- db = MySQLdb.connect(db='enwiki_p', host="enwiki-p.rrdb.toolserver.org", read_default_file=os.path.expanduser("~/.my.cnf"))
- cursor = db.cursor()
- form = cgi.FieldStorage()
- if "name" not in form:
- errorout("No name entered.")
- else:
- if "max" in form:
- try:
- maxsearch = min(maxlimit, int(form['max'].value))
- except:
- maxsearch = 50
- if "startdate" in form:
- try:
- startdate = form['startdate'].value
- startdate = startdate.ljust(14, "0")
- if len(startdate) != 14 or int(startdate) < 20000000000000 or int(startdate) > 20150000000000:
- startdate = None
- except:
- pass
- if "altname" in form:
- altusername = form['altname'].value
- if os.environ["HTTP_X_FORWARDED_FOR"].startswith("89.151.116.5"):
- errorout("Your IP address has been flagged for potential abuse. Please post a message to User talk:Scottywong on the English Wikipedia, or alternatively send an email to snottywong.wiki@gmail.com to prove that you are a human, and to explain why you've been consistently making so many queries on this tool.")
- try:
- username = form['name'].value.replace("_", " ").replace("+", " ")
- username = username[0].capitalize() + username[1:]
- f = open("/home/snottywong/rfastatslog.txt", "a")
- f.write("<log><ip>" + os.environ["HTTP_X_FORWARDED_FOR"] + "</ip><username>" + username + "</username><max>" + str(maxsearch) + "</max><timestamp>" + datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "</timestamp>" + ("<startdate>" + startdate + "</startdate>" if startdate else "") + ("<altname>" + altusername + "</altname>" if altusername else "") + "</log>\n")
- f.close()
- cursor.execute(u'SELECT user_id FROM user WHERE user_name=%s;', (username))
- userid = cursor.fetchall()[0][0]
- except:
- #errorout("Username not found." + traceback.print_exc(file=sys.stdout))
- errorout("Username not found.")
- if "showall" in form:
- if form['showall'].value.lower() == "true":
- showall = True
- cursor = db.cursor()
- if startdate:
- cursor.execute("""
- SELECT DISTINCT page_title
- FROM revision JOIN page ON rev_page=page_id
- WHERE rev_user=%s
- AND page_namespace=4
- AND page_title LIKE "Requests_for_adminship/%%"
- AND rev_timestamp<=%s
- ORDER BY rev_timestamp DESC;""",
- (userid, startdate)
- )
- else:
- cursor.execute("""
- SELECT DISTINCT page_title
- FROM revision JOIN page ON rev_page=page_id
- WHERE rev_user=%s
- AND page_namespace=4
- AND page_title LIKE "Requests_for_adminship/%%"
- ORDER BY rev_timestamp DESC;""",
- (userid)
- )
- results = cursor.fetchall()
- db.close()
- print "<div style=\"width:875px;\"><h1>RfA voting statistics for User:" + username + "</h1>\n"
- if len(results) == 0:
- errorout("No RfA's found. Try a different date range. Also, note that if the user's username does not appear in the wikitext of their signature, you may need to specify an alternate name.")
- else:
- print "These statistics were compiled by an automated process, and may contain errors or omissions due to the wide variety of styles with which people cast votes at RfA.\n"
- print "<br><h2>Vote totals</h2>\n"
- datestr = ""
- if startdate:
- datestr = " from " + startdate[4:6] + "/" + startdate[6:8] + "/" + startdate[:4] + " and earlier"
- print "Total number of unique RfA pages edited by " + username + datestr + ": " + str(len(results)) + "<br>\n"
- print "Analyzed the last " + str(min(maxsearch, len(results))) + " votes by this user.<br>\n"
- analyze(results[:min(maxsearch, len(results))])
- printtable()
- elapsed = time.time() - starttime
- print "</div>\n<br><br><small>Elapsed time: " + str(round(elapsed, 2)) + " seconds.<br>\n"
- print datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "</small><br>"
- except SystemExit:
- pass
- except:
- errorout("Unhandled exception.<br><br>" + traceback.print_exc(file=sys.stdout))
- def analyze(pages):
- global tablelist
- global votecounts
- if len(pages) <= 50:
- alldata = APIgetlotsofpages(pages)
- else:
- alldata = {}
- for i in range(0, len(pages), 50):
- newdata = APIgetlotsofpages(pages[i:min(i+50, len(pages))])
- alldata = dict(alldata.items() + newdata.items())
- for entry in pages:
- try:
- page = entry[0]
- data = alldata["Wikipedia:" + page.replace("_", " ")]
- result = findresults(data[:data.find("==Nomination==")])
- if data.find("\n=====Support=====") >= 0:
- supportvotes = voteregex.findall(data[data.find("\n=====Support====="):data.find("\n=====Oppose=====")])
- opposevotes = voteregex.findall(data[data.find("\n=====Oppose====="):data.find("\n=====Neutral=====")])
- neutralvotes = voteregex.findall(data[data.find("\n=====Neutral====="):])
- else:
- supportvotes = voteregex.findall(data[data.find("\n'''Support'''"):data.find("\n'''Oppose'''")]) #Older style of formatting
- opposevotes = voteregex.findall(data[data.find("\n'''Oppose'''"):data.find("\n'''Neutral'''")])
- neutralvotes = voteregex.findall(data[data.find("\n'''Neutral'''"):])
- foundvote = False
- for vote in supportvotes:
- if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
- votermatch = userregex.match(vote[vote.rfind("[[User"):])
- else:
- votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):]) #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
- if votermatch == None:
- continue
- else:
- voter = votermatch.group(1).strip().replace("_", " ")
- if voter.lower() == username.lower() or voter.lower() == altusername.lower(): #found our user's vote
- timematch = timeregex.search(vote)
- if timematch == None:
- votetime = ""
- else:
- votetime = parsetime(timematch.group(1))
- tablelist.append((page, "Support", votetime, result, match("Support", result)))
- votecounts[0] += 1
- foundvote = True
- break
- if not foundvote:
- for vote in opposevotes:
- if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
- votermatch = userregex.match(vote[vote.rfind("[[User"):])
- else:
- votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):]) #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
- if votermatch == None:
- continue
- else:
- voter = votermatch.group(1).strip()
- if voter.lower() == username.lower() or voter.lower() == altusername.lower(): #found our user's vote
- timematch = timeregex.search(vote)
- if timematch == None:
- votetime = ""
- else:
- votetime = parsetime(timematch.group(1))
- tablelist.append((page, "Oppose", votetime, result, match("Oppose", result)))
- votecounts[1] += 1
- foundvote = True
- break
- if not foundvote:
- for vote in neutralvotes:
- if vote.rfind("[[User", 0, vote.rfind("[[User")) == -1:
- votermatch = userregex.match(vote[vote.rfind("[[User"):])
- else:
- votermatch = userregex.match(vote[vote.rfind("[[User", 0, vote.rfind("[[User")):]) #Most sigs have [[User:Foo|Foo]] [[User talk:Foo|(talk)]]
- if votermatch == None:
- continue
- else:
- voter = votermatch.group(1).strip()
- if voter.lower() == username.lower() or voter.lower() == altusername.lower(): #found our user's vote
- timematch = timeregex.search(vote)
- if timematch == None:
- votetime = ""
- else:
- votetime = parsetime(timematch.group(1))
- tablelist.append((page, "Neutral", votetime, result, match("Neutral", result)))
- votecounts[2] += 1
- foundvote = True
- break
- if not foundvote: #The user edited this page but didn't vote, or vote wasn't parseable
- votecounts[3] += 1
- if showall:
- tablelist.append((page, "Comments", None, result, None))
- except:
- #errorout("Fatal error while parsing votes.<br>" + traceback.print_exc(file=sys.stdout))
- continue
- def findresults(thepage):
- if "The following discussion is preserved as an archive of a [[wikipedia:requests for adminship|request for adminship]] that '''did not succeed'''" in thepage:
- return "Unsuccessful"
- elif "The following discussion is preserved as an archive of a '''successful''' [[wikipedia:requests for adminship|request for adminship]]" in thepage:
- return "Successful"
- else:
- return "Not closed yet"
- def parsetime(t):
- tm = timeparseregex.search(t)
- if tm == None:
- return ""
- else:
- return tm.group(2) + " " + tm.group(1) + ", " + tm.group(3)
- def match(v, r):
- if r == "Not closed yet":
- return None
- if v == "Neutral":
- return None
- if v == "Support" and r == "Successful":
- matchstats[0] += 1
- return True
- if v == "Oppose" and r == "Unsuccessful":
- matchstats[0] += 1
- return True
- matchstats[1] += 1
- return False
- def APIgetlotsofpages(rawpagelist):
- try:
- p = ''
- for page in rawpagelist:
- p += urllib.quote("Wikipedia:" + page[0].replace("_", " ") + "|")
- u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=xml&titles=" + p[:-3])
- xml = u.read()
- u.close()
- pagelist = re.findall(r'<page.*?>.*?</page>', xml, re.DOTALL)
- pagedict = {}
- for i in pagelist:
- try:
- pagename = re.search(r'<page.*?title=\"(.*?)\">', i).group(1)
- text = re.search(r'<rev xml:space="preserve">(.*?)</rev>', i, re.DOTALL).group(1)
- pagedict[unescape(pagename)] = text
- except:
- #print sys.exc_info()[0]
- #print "<br>"
- #print traceback.print_exc(file=sys.stdout)
- #print "<br>"
- continue
- return pagedict
- except:
- #print sys.exc_info()[0]
- #print "<br>"
- #print traceback.print_exc(file=sys.stdout)
- #print "<br>"
- errorout("Error getting RfA pages from API. Please try again later.")
- def link(p):
- text = cgi.escape(p.replace("_", " ")[23:])
- if len(text) > 64:
- text = text[:61] + "..."
- return '<a href="http://en.wikipedia.org/wiki/Wikipedia:' + urllib.quote(p) + '">' + text + '</a>'
- def unescape(s):
- p = htmllib.HTMLParser(None)
- p.save_bgn()
- p.feed(s)
- return p.save_end()
- def datefmt(datestr):
- tg = timeunparseregex.search(datestr)
- if tg == None:
- return
- month = [k for k,v in monthmap.items() if v==tg.group(1)][0]
- day = tg.group(2)
- year = tg.group(3)
- if len(day) == 1:
- day = "0" + day
- return year + month + day
- def errorout(errorstr): #prints error string and exits
- print "<br><br>ERROR: " + errorstr + "<br><br>Please try again.<br><br>"
- print "</div></BODY>\n</HTML>"
- sys.exit(0)
- def printtable():
- global votecounts
- print "<ul>\n"
- totalvotes = sum(votecounts)
- print "<li>Support votes: " + str(votecounts[0]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[0] / totalvotes, 1))) + "%)</li>"
- print "<li>Oppose votes: " + str(votecounts[1]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[1] / totalvotes, 1))) + "%)</li>"
- print "<li>Neutral votes: " + str(votecounts[2]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[2] / totalvotes, 1))) + "%)</li>"
- print "<li>Comments or unparseable votes: " + str(votecounts[3]) + " (" + ("0" if totalvotes==0 else str(round(100.0 * votecounts[3] / totalvotes, 1))) + "%)</li>"
- if sum(matchstats):
- print "<li>This user's vote matched the end result of the RfA " + str(matchstats[0]) + " times, or " + str(round(100.0 * matchstats[0] / sum(matchstats), 1)) + "% of the time.</li>"
- print "</ul><br>"
- print "<h2>Individual RfA's</h2><br>"
- if len(tablelist) > 0 and tablelist[-1][2] and sum(votecounts) == maxsearch:
- print '<a href="http://toolserver.org/~snottywong/cgi-bin/rfastats.cgi?name=' + username.replace(" ", "_") + '&max=' + str(maxsearch) + '&startdate=' + datefmt(tablelist[-1][2]) + '&altname=' + altusername + ('&showall=true' if showall else '') + '"><small>Next ' + str(maxsearch) + " votes →</small></a><br>"
- print """</div>
- <table>
- <thead>
- <tr>
- <th scope="col">RfA</th>
- <th scope="col">Date</th>
- <th scope="col">Vote</th>
- <th scope="col">Result</th>
- </tr>
- </thead>
- <tbody>
- """
- for i in tablelist:
- print "<tr>\n"
- print "<td>" + link(i[0]) + "</td>"
- print "<td>" + (i[2] if i[2] else "N/A") + "</td>"
- print "<td>" + i[1] + "</td>"
- if i[4] == True:
- print '<td class="y">' + i[3] + '</td>'
- elif i[4] == False:
- print '<td class="n">' + i[3] + '</td>'
- elif i[4] == None:
- print '<td class="m">' + i[3] + '</td>'
- print "</tr>"
- print "</tbody>\n</table>\n"
- if len(tablelist) > 0 and tablelist[-1][2] and sum(votecounts) == maxsearch:
- print '<a href="http://toolserver.org/~snottywong/cgi-bin/rfastats.cgi?name=' + username.replace(" ", "_") + '&max=' + str(maxsearch) + '&startdate=' + datefmt(tablelist[-1][2]) + '&altname=' + altusername + ('&showall=true' if showall else '') + '"><small>Next ' + str(maxsearch) + " votes →</small></a><br>"
- main()
- print '<small>Bugs, suggestions, questions? Contact the author at <a href="http://en.wikipedia.org/wiki/User_talk:Snottywong">User talk:Snottywong</a></small><br>'
- print '<a href="http://toolserver.org/~snottywong/rfastats.html"><small>←New search</small></a>'
- print "</div></BODY>\n</HTML>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement