SHOW:
|
|
- or go back to the newest paste.
1 | """ | |
2 | Do your friends have more friends than you? | |
3 | - | It's not you. It's statistics. |
3 | + | It's not you. It's the Friendship Paradox. |
4 | Sample code that accompanies | |
5 | http://badmomgoodmom.blogspot.com/2013/01/self-learner-project-friendship.html | |
6 | Written 6 Jan 2013 by BMGM | |
7 | """ | |
8 | import pylab | |
9 | import numpy | |
10 | ||
11 | def pickNumFriends(nsubj, shape=3, scale=5, stretch=5): | |
12 | """ | |
13 | nsubj: number of people/subjects in circle | |
14 | numf: number of friends for each subject, drawn from a stretched | |
15 | gamma distribution of default values (shape=4, scale=5, stretch=10) | |
16 | If number of friends for a subject > nsub, selects another number | |
17 | returns a list sorted in ascending order | |
18 | This is in here for reference but use the newer | |
19 | pickNumFriendsRecur instead. | |
20 | ||
21 | """ | |
22 | ||
23 | if nsubj <= 0: | |
24 | raise ValueError('Set size must be greater than 0') | |
25 | ||
26 | numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)] | |
27 | print "avg numf before check: ", float(sum(numf))/float(nsubj) | |
28 | ||
29 | for i in range(nsubj): | |
30 | while numf[i] > nsubj-1 : | |
31 | numf[i] = int(0.5+10*numpy.random.gamma(shape,scale)) | |
32 | ||
33 | print "Avg numf after size check: ", float(sum(numf))/float(nsubj) | |
34 | # there should be only a slight difference, if any | |
35 | # otherwise, increase the nsubj, reduce stretch, or change shape, scale | |
36 | ||
37 | return(pylab.sort(numf)) | |
38 | ||
39 | def pickNumFriendsRecur(nsubj, shape=2, scale=5, stretch=5): | |
40 | """ | |
41 | nsubj: number of people/subjects in circle | |
42 | numf: number of friends for each subject, drawn from a stretched | |
43 | gamma distribution of default values (shape=4, scale=5, stretch=10) | |
44 | if max(numf) > nsubj, reduces scale factor | |
45 | and tries again, recursively, until not true | |
46 | returns a list sorted in ascending order | |
47 | ||
48 | """ | |
49 | ||
50 | if nsubj <= 0: | |
51 | raise ValueError('Set size must be greater than 0') | |
52 | ||
53 | numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)] | |
54 | ||
55 | if max(numf) > nsubj: | |
56 | print "max(numf) > nsubj: ", max(numf), nsubj | |
57 | print "trying reducing scale to: ", scale - 0.1 | |
58 | numf = pickNumFriendsRecur(nsubj, shape, scale-0.1, stretch) | |
59 | print "avg numf after iteration: ", float(sum(numf))/float(nsubj) | |
60 | ||
61 | #pylab.hist(numf,20) | |
62 | #pylab.show() | |
63 | ||
64 | return(pylab.sort(numf)) | |
65 | ||
66 | def assignFriends(numf): | |
67 | """ | |
68 | Given the list of the number of friends per subject, | |
69 | returns a list of lists of the friends for each subject | |
70 | """ | |
71 | import random | |
72 | nsubj = len(numf) | |
73 | friendList = [[] for _ in range(nsubj)] | |
74 | toofew = 0 | |
75 | factual = [] | |
76 | ||
77 | for i in range(nsubj): | |
78 | #print "i, numf, friendstart: ", i, numf[i], friendList[i] | |
79 | # number of friends left to assign | |
80 | npick = numf[i] - len(friendList[i]) | |
81 | pool = range(i+1, nsubj) | |
82 | ||
83 | #print "nleft to pick, len(pool), pool: ", nleft, len(pool), pool | |
84 | if npick <= len(pool): | |
85 | newfriends = random.sample(set(pool),npick) | |
86 | else: # Not enough left in the pool. Assigning all remaining" | |
87 | toofew += 1 | |
88 | newfriends = pool | |
89 | # Then randomly pick some more from earlier rejections | |
90 | nleft = npick - len(newfriends) | |
91 | reluctant = random.sample(set(range(i)), nleft) | |
92 | newfriends += reluctant | |
93 | ||
94 | #print friends | |
95 | friendList[i] += newfriends | |
96 | ||
97 | ||
98 | for f in newfriends: # newfriends friend back | |
99 | friendList[f].append(i) | |
100 | ||
101 | for i in range(nsubj): | |
102 | factual.append(len(friendList[i])) | |
103 | ||
104 | print "Target average friends: ", sum(numf)/float(nsubj) | |
105 | print "Adusted average assigned friends: ", sum(factual)/float(nsubj) | |
106 | ||
107 | pylab.title("Distribution of # Friends\nAverage number of friends: "+str(sum(factual)/float(nsubj))) | |
108 | p1 = pylab.hist(factual,bins=range(0,500,10), label="After") | |
109 | p2 = pylab.hist(numf, bins=range(0,500,10), histtype = 'step', label="Before Adjustment", linewidth=3) | |
110 | #pylab.legend([p2,p1],["Before adjustment","After"]) | |
111 | pylab.show() | |
112 | ||
113 | return friendList | |
114 | ||
115 | def howMany(flist): | |
116 | """ | |
117 | How many friends do your friends have, on average? | |
118 | And how does that compare to the number you have? | |
119 | """ | |
120 | numfof = [] | |
121 | numf = [] | |
122 | ||
123 | for i in range(len(flist)): | |
124 | numf.append(len(flist[i])) | |
125 | ifof = 0 | |
126 | for j in flist[i]: | |
127 | ifof += len(flist[j]) | |
128 | avgfof = float(ifof)/len(flist[i]) | |
129 | numfof.append(avgfof) | |
130 | ||
131 | #print "Average friends: ", float(sum(numf))/len(flist) | |
132 | print "Average friends of friends: ", float(sum(numfof))/len(flist) | |
133 | ||
134 | pylab.title("Friendship Paradox") | |
135 | pylab.xlabel('Person') | |
136 | pylab.ylabel('# Friends') | |
137 | p1 = pylab.plot(numf, "bo", label="# Friends") | |
138 | p2 = pylab.plot(numfof, "ro", label="# F of F") | |
139 | pylab.legend() | |
140 | pylab.show() | |
141 | ||
142 | return numfof | |
143 | ||
144 | numf = pickNumFriendsRecur(500, shape=3, stretch=8) | |
145 | flist = assignFriends(numf) | |
146 | fof = howMany(flist) |