View difference between Paste ID: <a href="/YP3wgE3b">YP3wgE3b</a> and <a href="/WXbFV2Jp">WXbFV2Jp</a>

"""
1		"""
2		Do your friends have more friends than you?
3	-	It's not you. It's statistics.
3	+	It's not you. It's the Friendship Paradox.
4		Sample code that accompanies
5		http://badmomgoodmom.blogspot.com/2013/01/self-learner-project-friendship.html
6		Written 6 Jan 2013 by BMGM
7		"""
8		import pylab
9		import numpy
10
11		def pickNumFriends(nsubj, shape=3, scale=5, stretch=5):
12		"""
13		nsubj: number of people/subjects in circle
14		numf: number of friends for each subject, drawn from a stretched
15		gamma distribution of default values (shape=4, scale=5, stretch=10)
16		If number of friends for a subject > nsub, selects another number
17		returns a list sorted in ascending order
18		This is in here for reference but use the newer
19		pickNumFriendsRecur instead.
20
21		"""
22
23		if nsubj <= 0:
24		raise ValueError('Set size must be greater than 0')
25
26		numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
27		print "avg numf before check: ", float(sum(numf))/float(nsubj)
28
29		for i in range(nsubj):
30		while numf[i] > nsubj-1 :
31		numf[i] = int(0.5+10*numpy.random.gamma(shape,scale))
32
33		print "Avg numf after size check: ", float(sum(numf))/float(nsubj)
34		# there should be only a slight difference, if any
35		# otherwise, increase the nsubj, reduce stretch, or change shape, scale
36
37		return(pylab.sort(numf))
38
39		def pickNumFriendsRecur(nsubj, shape=2, scale=5, stretch=5):
40		"""
41		nsubj: number of people/subjects in circle
42		numf: number of friends for each subject, drawn from a stretched
43		gamma distribution of default values (shape=4, scale=5, stretch=10)
44		if max(numf) > nsubj, reduces scale factor
45		and tries again, recursively, until not true
46		returns a list sorted in ascending order
47
48		"""
49
50		if nsubj <= 0:
51		raise ValueError('Set size must be greater than 0')
52
53		numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
54
55		if max(numf) > nsubj:
56		print "max(numf) > nsubj: ", max(numf), nsubj
57		print "trying reducing scale to: ", scale - 0.1
58		numf = pickNumFriendsRecur(nsubj, shape, scale-0.1, stretch)
59		print "avg numf after iteration: ", float(sum(numf))/float(nsubj)
60
61		#pylab.hist(numf,20)
62		#pylab.show()
63
64		return(pylab.sort(numf))
65
66		def assignFriends(numf):
67		"""
68		Given the list of the number of friends per subject,
69		returns a list of lists of the friends for each subject
70		"""
71		import random
72		nsubj = len(numf)
73		friendList = [[] for _ in range(nsubj)]
74		toofew = 0
75		factual = []
76
77		for i in range(nsubj):
78		#print "i, numf, friendstart: ", i, numf[i], friendList[i]
79		# number of friends left to assign
80		npick = numf[i] - len(friendList[i])
81		pool = range(i+1, nsubj)
82
83		#print "nleft to pick, len(pool), pool: ", nleft, len(pool), pool
84		if npick <= len(pool):
85		newfriends = random.sample(set(pool),npick)
86		else: # Not enough left in the pool. Assigning all remaining"
87		toofew += 1
88		newfriends = pool
89		# Then randomly pick some more from earlier rejections
90		nleft = npick - len(newfriends)
91		reluctant = random.sample(set(range(i)), nleft)
92		newfriends += reluctant
93
94		#print friends
95		friendList[i] += newfriends
96
97
98		for f in newfriends: # newfriends friend back
99		friendList[f].append(i)
100
101		for i in range(nsubj):
102		factual.append(len(friendList[i]))
103
104		print "Target average friends: ", sum(numf)/float(nsubj)
105		print "Adusted average assigned friends: ", sum(factual)/float(nsubj)
106
107		pylab.title("Distribution of # Friends\nAverage number of friends: "+str(sum(factual)/float(nsubj)))
108		p1 = pylab.hist(factual,bins=range(0,500,10), label="After")
109		p2 = pylab.hist(numf, bins=range(0,500,10), histtype = 'step', label="Before Adjustment", linewidth=3)
110		#pylab.legend([p2,p1],["Before adjustment","After"])
111		pylab.show()
112
113		return friendList
114
115		def howMany(flist):
116		"""
117		How many friends do your friends have, on average?
118		And how does that compare to the number you have?
119		"""
120		numfof = []
121		numf = []
122
123		for i in range(len(flist)):
124		numf.append(len(flist[i]))
125		ifof = 0
126		for j in flist[i]:
127		ifof += len(flist[j])
128		avgfof = float(ifof)/len(flist[i])
129		numfof.append(avgfof)
130
131		#print "Average friends: ", float(sum(numf))/len(flist)
132		print "Average friends of friends: ", float(sum(numfof))/len(flist)
133
134		pylab.title("Friendship Paradox")
135		pylab.xlabel('Person')
136		pylab.ylabel('# Friends')
137		p1 = pylab.plot(numf, "bo", label="# Friends")
138		p2 = pylab.plot(numfof, "ro", label="# F of F")
139		pylab.legend()
140		pylab.show()
141
142		return numfof
143
144		numf = pickNumFriendsRecur(500, shape=3, stretch=8)
145		flist = assignFriends(numf)
146		fof = howMany(flist)