Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime
- import sys
- import random
- import numpy as np
- def _rank_dists(ranks1, ranks2):
- """Finds the difference between the values in ranks1 and ranks2 for keys
- present in both arrays.
- """
- #Find keys in both arrays
- d = ranks1[np.in1d(ranks1['keys'], ranks2['keys'])]
- d2 = ranks2[np.in1d(ranks2['keys'], ranks1['keys'])]
- #Sort the arrays (based on the keys)
- d = np.sort(d, order=['ranks'])[::-1]
- d2 = np.sort(d2, order=['ranks'])[::-1]
- #Return the differences
- return d['ranks'] - d2['ranks']
- #@profile
- def spearman_correlation(ranks1, ranks2):
- """Returns the Spearman correlation coefficient for two rankings, which
- should be dicts or sequences of (key, rank). The coefficient ranges from
- -1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
- calculated for keys in both rankings (for meaningful results, remove keys
- present in only one list before ranking)."""
- ranks1 = np.array(ranks1, dtype=[('keys', int), ('ranks', float)])
- ranks2 = np.array(ranks2, dtype=[('keys', int), ('ranks', float)])
- diffs = _rank_dists(ranks1, ranks2)
- # diffs ** 2
- diffs_s2 = diffs * diffs
- #sum all diffs
- diffs = np.sum(diffs_s2)
- n_diffs = diffs_s2.size
- if diffs_s2.size > 0:
- return 1 - (6 * diffs / (n_diffs * (n_diffs * n_diffs - 1)))
- else:
- return 0.0
- if __name__ == '__main__':
- n = sys.argv[1]
- n = int(n)
- possible_items_x = range(1, n+1)
- possible_items_y = range(1, n+1)
- random.shuffle(possible_items_y)
- random.shuffle(possible_items_x)
- ranks1 = []
- ranks2 = []
- for x in xrange(n):
- item1 = possible_items_x.pop()
- item2 = possible_items_y.pop()
- ranks1.append((item1, random.random() * 4.0 + 1.0))
- ranks2.append((item2, random.random() * 4.0 + 1.0))
- start_time = datetime.datetime.now()
- print spearman_correlation(ranks1, ranks2)
- end_time = datetime.datetime.now()
- secs = end_time - start_time
- print "Main took", secs
Add Comment
Please, Sign In to add comment