Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from django.core.management.base import BaseCommand, CommandError
- from math import sqrt, fabs, pow
- from copy import deepcopy
- from collections import defaultdict
- from ratings.models import Rating, Participant, ParticipantRelation
- class RelationEqualizer(object):
- def __init__(self, rating):
- self._rating = rating
- self._prel = {}
- self._pmap = {}
- self._antifraud = {}
- self._uvn_iterations = 10 # user vector normalization iterations
- self._tn_iterations = 5 # table normalization iterations
- def _print(self):
- skeys = sorted(self._pmap.keys(), key=lambda x: -self._pmap[x].score)
- for row_id in skeys:
- vals = [ u'%20s' % self._pmap[row_id].user.username ]
- total = 0.0
- count = 0.0
- for col_id in skeys:
- rel = self._prel[row_id][col_id]
- if rel is None:
- vals.append('U')
- elif rel > 0:
- vals.append('B')
- elif rel < 0:
- vals.append('.')
- else:
- vals.append('=')
- vals.append('%5.1f' % self._user_scores(row_id))
- print u' '.join(vals)
- def _user_scores(self, user):
- count = len(self._pmap)
- if count == 0:
- return 0.0
- score = 0.0
- for rel in self._prel[user].itervalues():
- if rel is None:
- continue
- if rel >= 0:
- score += 1
- else:
- score -= 1
- return pow((score / count + 1.0) * 50, 0.5) * 10
- def _load_data(self):
- pmap = {}
- prel = {}
- rlist = ParticipantRelation.objects.filter(subject__rating=self._rating).all()
- for r in rlist:
- if not r.object.active or not r.subject.active:
- continue
- if not r.subject.pk in pmap:
- pmap[r.subject.pk] = r.subject
- if not r.object.pk in pmap:
- pmap[r.object.pk] = r.object
- if r.subject.pk not in prel:
- prel[r.subject.pk] = {}
- if r.object.pk not in prel:
- prel[r.object.pk] = {}
- prel[r.subject.pk][r.object.pk] = r.value
- for x in pmap.iterkeys():
- for y in pmap.iterkeys():
- if y not in prel[x]:
- prel[x][y] = None
- if x == y:
- prel[x][y] = 0.0
- self._pmap = pmap
- self._prel = prel
- def _load_antifraud(self):
- antifraud = {}
- prel = self._prel
- competitors_number = len(prel)
- for xid, rels in prel.iteritems():
- antifraud[xid] = 1.0
- count = 0.0
- frauds = 0.0
- for yid, rel in rels.iteritems():
- if prel[xid][yid] is None or prel[yid][xid] is None:
- continue
- count += 1
- if prel[xid][yid] * prel[yid][xid] < 0 or prel[xid][yid] == 0 and prel[yid][xid] == 0:
- continue
- if prel[xid][yid] * prel[yid][xid] > 0:
- frauds += 1.0
- elif prel[xid][yid] * prel[yid][xid] == 0:
- frauds += 2.0 /3.0
- if count > 0:
- antifraud[xid] = pow(1.0 - frauds / count, 5)
- self._antifraud = antifraud
- def _stronger_weaker_equals(self, id, weight, s, e, w):
- for yidx in self._prel.iterkeys():
- rel = self._prel[id][yidx]
- if rel is not None:
- if rel > 0:
- w[yidx] += weight * self._antifraud[id]
- elif rel < 0:
- s[yidx] += weight * self._antifraud[id]
- elif rel == 0:
- e[yidx] += weight * self._antifraud[id]
- rel = self._prel[yidx][id]
- if rel is not None:
- if rel < 0:
- w[yidx] += weight * self._antifraud[yidx]
- elif rel > 0:
- s[yidx] += weight * self._antifraud[yidx]
- elif rel == 0:
- e[yidx] += weight * self._antifraud[yidx]
- def _update_user_vector(self, user):
- stronger = defaultdict(lambda: 0.0) # players that stronger than user
- equals = defaultdict(lambda: 0.0)
- weaker = defaultdict(lambda: 0.0)
- equals[user] = 1
- for i in xrange(0, self._uvn_iterations):
- dummy = defaultdict(lambda: 0.0)
- s = defaultdict(lambda: 0.0)
- w = defaultdict(lambda: 0.0)
- e = defaultdict(lambda: 0.0)
- for yidx, yval in stronger.iteritems():
- self._stronger_weaker_equals(yidx, yval, s, s, dummy)
- for yidx, yval in equals.iteritems():
- self._stronger_weaker_equals(yidx, yval, s, e, w)
- for yidx, yval in weaker.iteritems():
- self._stronger_weaker_equals(yidx, yval, dummy, w, w)
- for k in set(s.keys()) | set(e.keys()) | set(w.keys()):
- sum = s[k] + w[k] + e[k]
- if sum > 0:
- s[k] /= sum
- e[k] /= sum
- w[k] /= sum
- stronger = s
- equals = e
- weaker = w
- result = {}
- for idx in self._prel.iterkeys():
- result[idx] = w[idx] - s[idx]
- if fabs(result[idx]) <= 1.0 / 3:
- result[idx] = 0.0
- return result
- def relations(self):
- self._load_data()
- for i in xrange(0, self._tn_iterations):
- new_prel = {}
- self._load_antifraud()
- for user in self._prel.iterkeys():
- new_prel[user] = self._update_user_vector(user)
- self._prel = new_prel
- return self._prel
- def update_rates(self):
- self._load_data()
- self._print()
- prel = self.relations()
- print ""
- self._print()
- for user in prel.iterkeys():
- uname = self._pmap[user].user.username.encode('utf8', 'ignore')
- self._pmap[user].score = self._user_scores(user)
- self._pmap[user].save()
- print "User %s scores %f" % (uname, self._pmap[user].score)
- class Command(BaseCommand):
- help = 'ratings normalization'
- def handle(self, *args, **kwargs):
- for r in Rating.objects.all():
- self.stdout.write("Processing rating %s" % r.title)
- req = RelationEqualizer(r)
- req.update_rates()
- self.stdout.write("")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement