Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Calculates player scores for the Double Post thread at the Forum Games section fo the xkcd fora.
- # Usage: python doublepostpoints.py [flags]
- # Use flag -l to include a log of all post authors, timestamps, and how many points each post scored.
- # Use flag -t to only count posts up to the supplied number
- # Use flag -i to specify a topic id (to use a different thread)
- import urllib.request, sys, re, datetime, getopt, time as mtime
- postPattern = re.compile('<p class="author"><a[^>]*><img[^>]*></a>by <strong><a[^>]*>([^>]*)</a></strong> » ([^>]*)</p>')
- datetimePattern = re.compile('... (...) ([0-9]{2}), ([0-9]{4}) ([0-9]{1,2}):([0-9]{2}) ([ap]m) UTC')
- pageNumberPattern = re.compile('Page <strong>([0-9]+)</strong> of <strong>([0-9]+)</strong>')
- months = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12}
- doneLastPage = False
- pagesDone = 0
- postsDone = 0
- lastPoster = None
- lastTime = None
- playerPoints = {}
- log, upto, topicId = False, None, '108854'
- flags, _ = getopt.getopt(sys.argv[1:], 'lt:i:')
- for flag, value in flags:
- if flag == '-t':
- upto = int(value)
- elif flag == '-l':
- log = True
- elif flag == '-i':
- topicId = (value == 'count' or value == 'million') and '109251' or value
- log and print('==== Log ====')
- try:
- while not doneLastPage:
- response=None
- attempts=0
- while response==None and attempts < 3:
- try:
- response = urllib.request.urlopen('http://forums.xkcd.com/viewtopic.php?t=' + topicId + '&start=' + str(40*pagesDone), timeout=15)
- except urllib.error.URLError:
- pass
- attempts += 1
- if not response:
- sys.exit("Unable to load page " + str(pagesDone+1) + " after 3 attempts")
- pageHTML = str(response.read())
- response.close()
- log and print('Page ' + str(pagesDone+1) + ':')
- for (poster, time) in re.findall(postPattern, pageHTML):
- log and print('\t' + poster + ': ' + time)
- timeParts = re.search(datetimePattern, time).groups()
- time = datetime.datetime(int(timeParts[2]), months[timeParts[0]], int(timeParts[1]), int(timeParts[3]) % 12 + (timeParts[5] == 'pm' and 12 or 0), int(timeParts[4]))
- if poster == lastPoster:
- points = int(((time - lastTime).total_seconds()/60)**2)
- playerPoints[poster] = (poster in playerPoints and playerPoints[poster] or 0) + points
- log and print('\t\tScored: ' + str(points))
- postsDone += 1
- if upto and postsDone == upto:
- raise Exception
- lastPoster, lastTime = poster, time
- (currentPage, lastPage) = re.search(pageNumberPattern, pageHTML).group(1, 2)
- pagesDone += 1
- doneLastPage = currentPage == lastPage
- mtime.sleep(1) #Pause to prevent spamming the server with requests.
- except Exception:
- pass
- log and print('')
- print('==== Points ====')
- for name in sorted(playerPoints, key=playerPoints.get, reverse=True):
- print(name + ': ' + str(playerPoints[name]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement