View difference between Paste ID: YP3wgE3b and WXbFV2Jp
SHOW: | | - or go back to the newest paste.
1
"""
2
Do your friends have more friends than you?
3-
It's not you.  It's statistics.
3+
It's not you.  It's the Friendship Paradox.
4
Sample code that accompanies
5
http://badmomgoodmom.blogspot.com/2013/01/self-learner-project-friendship.html
6
Written 6 Jan 2013 by BMGM
7
"""
8
import pylab
9
import numpy 
10
11
def pickNumFriends(nsubj, shape=3, scale=5, stretch=5):
12
    """
13
        nsubj: number of people/subjects in circle
14
        numf: number of friends for each subject, drawn from a stretched
15
        gamma distribution of default values (shape=4, scale=5, stretch=10)
16
        If number of friends for a subject > nsub, selects another number
17
        returns a list sorted in ascending order
18
        This is in here for reference but use the newer
19
        pickNumFriendsRecur instead.
20
        
21
    """
22
23
    if nsubj <= 0:
24
        raise ValueError('Set size must be greater than 0')
25
26
    numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
27
    print "avg numf before check: ", float(sum(numf))/float(nsubj)
28
29
    for i in range(nsubj):
30
        while numf[i] > nsubj-1 :
31
            numf[i] = int(0.5+10*numpy.random.gamma(shape,scale))
32
33
    print "Avg numf after size check: ", float(sum(numf))/float(nsubj)
34
    # there should be only a slight difference, if any
35
    # otherwise, increase the nsubj, reduce stretch, or change shape, scale
36
    
37
    return(pylab.sort(numf))
38
39
def pickNumFriendsRecur(nsubj, shape=2, scale=5, stretch=5):
40
    """
41
        nsubj: number of people/subjects in circle
42
        numf: number of friends for each subject, drawn from a stretched
43
        gamma distribution of default values (shape=4, scale=5, stretch=10)
44
        if max(numf) > nsubj, reduces scale factor
45
        and tries again, recursively, until not true
46
        returns a list sorted in ascending order
47
        
48
    """
49
50
    if nsubj <= 0:
51
        raise ValueError('Set size must be greater than 0')
52
53
    numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
54
55
    if max(numf) > nsubj:
56
        print "max(numf) > nsubj: ", max(numf), nsubj
57
        print "trying reducing scale to: ", scale - 0.1
58
        numf = pickNumFriendsRecur(nsubj, shape, scale-0.1, stretch)
59
        print "avg numf after iteration: ", float(sum(numf))/float(nsubj)
60
61
    #pylab.hist(numf,20)
62
    #pylab.show()
63
    
64
    return(pylab.sort(numf))
65
66
def assignFriends(numf):
67
    """
68
        Given the list of the number of friends per subject,
69
        returns a list of lists of the friends for each subject
70
    """
71
    import random
72
    nsubj = len(numf)
73
    friendList = [[] for _ in range(nsubj)]
74
    toofew = 0
75
    factual = []
76
    
77
    for i in range(nsubj):
78
        #print "i, numf, friendstart: ", i, numf[i], friendList[i]
79
        # number of friends left to assign
80
        npick = numf[i] - len(friendList[i])
81
        pool = range(i+1, nsubj)
82
        
83
        #print "nleft to pick, len(pool), pool: ", nleft, len(pool), pool
84
        if npick <= len(pool):
85
            newfriends = random.sample(set(pool),npick)
86
        else: # Not enough left in the pool.  Assigning all remaining"
87
            toofew += 1
88
            newfriends = pool
89
            # Then randomly pick some more from earlier rejections
90
            nleft = npick - len(newfriends)
91
            reluctant = random.sample(set(range(i)), nleft)
92
            newfriends += reluctant
93
            
94
        #print friends
95
        friendList[i] += newfriends
96
        
97
98
        for f in newfriends: # newfriends friend back
99
            friendList[f].append(i)
100
101
    for i in range(nsubj):
102
        factual.append(len(friendList[i])) 
103
    
104
    print "Target average friends: ", sum(numf)/float(nsubj)
105
    print "Adusted average assigned friends: ", sum(factual)/float(nsubj)
106
107
    pylab.title("Distribution of # Friends\nAverage number of friends: "+str(sum(factual)/float(nsubj)))
108
    p1 = pylab.hist(factual,bins=range(0,500,10), label="After")
109
    p2 = pylab.hist(numf, bins=range(0,500,10), histtype = 'step', label="Before Adjustment", linewidth=3)
110
    #pylab.legend([p2,p1],["Before adjustment","After"])
111
    pylab.show()
112
    
113
    return friendList
114
115
def howMany(flist):
116
    """
117
    How many friends do your friends have, on average?
118
    And how does that compare to the number you have?
119
    """
120
    numfof = []
121
    numf = []
122
123
    for i in range(len(flist)):
124
        numf.append(len(flist[i]))
125
        ifof = 0
126
        for j in flist[i]:
127
            ifof += len(flist[j])
128
        avgfof = float(ifof)/len(flist[i])
129
        numfof.append(avgfof)
130
131
    #print "Average friends: ", float(sum(numf))/len(flist)
132
    print "Average friends of friends: ", float(sum(numfof))/len(flist)
133
134
    pylab.title("Friendship Paradox")
135
    pylab.xlabel('Person')
136
    pylab.ylabel('# Friends')
137
    p1 = pylab.plot(numf, "bo", label="# Friends")
138
    p2 = pylab.plot(numfof, "ro", label="# F of F")
139
    pylab.legend()
140
    pylab.show()
141
142
    return numfof
143
144
numf = pickNumFriendsRecur(500, shape=3, stretch=8)
145
flist = assignFriends(numf)
146
fof = howMany(flist)