Advertisement
stackexchange-gilles

suggested-edit-delay-stats

Dec 13th, 2013
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.26 KB | None | 0 0
  1. #! /usr/bin/env python
  2. import json, sys, time, urllib
  3.  
  4. filter_code = '!*MKIiKJWje.bjTxT'
  5. topbar_rollout_dates = {
  6.     'meta.stackoverflow': 1384466400, # 2013-11-14 22:00 UTC
  7.     None: 1386183600, # 2013-12-04 19:00 UTC
  8.     'askubuntu': 1386633600, # 2013-12-10 00:00 UTC
  9. }
  10. seconds_per_week = 604800
  11. delay_between_sites = 1
  12.  
  13. def retrieve(site, from_date, to_date):
  14.     page = 1
  15.     url = 'http://api.stackexchange.com/2.1/suggested-edits?page=%d&pagesize=100&fromdate=%d&todate=%d&order=desc&sort=creation&site=%s&filter=%s' % (page, from_date, to_date, site, filter_code)
  16.     f = urllib.urlopen(url)
  17.     data = json.load(f)
  18.     f.close()
  19.     return data['items']
  20.  
  21. def average_delay(se_list):
  22.     total = 0
  23.     count = 0
  24.     for se in se_list:
  25.         creation_date = se['creation_date']
  26.         if se.has_key('approval_date'):
  27.             total += se['approval_date'] - creation_date
  28.             count += 1
  29.         elif se.has_key('rejection_date'):
  30.             total += se['rejection_date'] - creation_date
  31.             count += 1
  32.     if count == 0: return (0, float('nan'))
  33.     return (count, total / 60.0 / count)
  34.  
  35. if __name__ == '__main__':
  36.     print 'site                   before^2      before       after'
  37.     print '                      avg(mn) cnt  avg(mn) cnt  avg(mn) cnt'
  38.     for site in sys.argv[1:]:
  39.         topbar_rollout_date = \
  40.             topbar_rollout_dates[site if topbar_rollout_dates.has_key(site) else None]
  41.         after_list = retrieve(site, topbar_rollout_date, topbar_rollout_date + seconds_per_week)
  42.         (after_count, after_delay) = average_delay(after_list)
  43.         before_list = retrieve(site, topbar_rollout_date - seconds_per_week, topbar_rollout_date)
  44.         (before_count, before_delay) = average_delay(before_list)
  45.         before2_list = retrieve(site, topbar_rollout_date - 2 * seconds_per_week, topbar_rollout_date - seconds_per_week)
  46.         (before2_count, before2_delay) = average_delay(before2_list)
  47.         print '%20s  %6.0f %4d  %6.0f %4d  %6.0f %4d  %4d%%' % (
  48.             site,
  49.             before2_delay, before2_count,
  50.             before_delay, before_count,
  51.             after_delay, after_count,
  52.             int(200.0 * after_delay / (before2_delay + before_delay)))
  53.         time.sleep(delay_between_sites)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement