Advertisement
Guest User

Friendship Paradox in Python

a guest
Jan 6th, 2013
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.99 KB | None | 0 0
  1. """
  2. Do your friends have more friends than you?
  3. It's not you. It's statistics.
  4. Sample code that accompanies
  5. http://badmomgoodmom.blogspot.com/2013/01/self-learner-project-friendship.html
  6. Written 6 Jan 2013 by BMGM
  7. """
  8. import pylab
  9. import numpy
  10.  
  11. def pickNumFriends(nsubj, shape=3, scale=5, stretch=5):
  12. """
  13. nsubj: number of people/subjects in circle
  14. numf: number of friends for each subject, drawn from a stretched
  15. gamma distribution of default values (shape=4, scale=5, stretch=10)
  16. If number of friends for a subject > nsub, selects another number
  17. returns a list sorted in ascending order
  18. This is in here for reference but use the newer
  19. pickNumFriendsRecur instead.
  20.  
  21. """
  22.  
  23. if nsubj <= 0:
  24. raise ValueError('Set size must be greater than 0')
  25.  
  26. numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
  27. print "avg numf before check: ", float(sum(numf))/float(nsubj)
  28.  
  29. for i in range(nsubj):
  30. while numf[i] > nsubj-1 :
  31. numf[i] = int(0.5+10*numpy.random.gamma(shape,scale))
  32.  
  33. print "Avg numf after size check: ", float(sum(numf))/float(nsubj)
  34. # there should be only a slight difference, if any
  35. # otherwise, increase the nsubj, reduce stretch, or change shape, scale
  36.  
  37. return(pylab.sort(numf))
  38.  
  39. def pickNumFriendsRecur(nsubj, shape=2, scale=5, stretch=5):
  40. """
  41. nsubj: number of people/subjects in circle
  42. numf: number of friends for each subject, drawn from a stretched
  43. gamma distribution of default values (shape=4, scale=5, stretch=10)
  44. if max(numf) > nsubj, reduces scale factor
  45. and tries again, recursively, until not true
  46. returns a list sorted in ascending order
  47.  
  48. """
  49.  
  50. if nsubj <= 0:
  51. raise ValueError('Set size must be greater than 0')
  52.  
  53. numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
  54.  
  55. if max(numf) > nsubj:
  56. print "max(numf) > nsubj: ", max(numf), nsubj
  57. print "trying reducing scale to: ", scale - 0.1
  58. numf = pickNumFriendsRecur(nsubj, shape, scale-0.1, stretch)
  59. print "avg numf after iteration: ", float(sum(numf))/float(nsubj)
  60.  
  61. #pylab.hist(numf,20)
  62. #pylab.show()
  63.  
  64. return(pylab.sort(numf))
  65.  
  66. def assignFriends(numf):
  67. """
  68. Given the list of the number of friends per subject,
  69. returns a list of lists of the friends for each subject
  70. """
  71. import random
  72. nsubj = len(numf)
  73. friendList = [[] for _ in range(nsubj)]
  74. toofew = 0
  75. factual = []
  76.  
  77. for i in range(nsubj):
  78. #print "i, numf, friendstart: ", i, numf[i], friendList[i]
  79. # number of friends left to assign
  80. npick = numf[i] - len(friendList[i])
  81. pool = range(i+1, nsubj)
  82.  
  83. #print "nleft to pick, len(pool), pool: ", nleft, len(pool), pool
  84. if npick <= len(pool):
  85. newfriends = random.sample(set(pool),npick)
  86. else: # Not enough left in the pool. Assigning all remaining"
  87. toofew += 1
  88. newfriends = pool
  89. # Then randomly pick some more from earlier rejections
  90. nleft = npick - len(newfriends)
  91. reluctant = random.sample(set(range(i)), nleft)
  92. newfriends += reluctant
  93.  
  94. #print friends
  95. friendList[i] += newfriends
  96.  
  97.  
  98. for f in newfriends: # newfriends friend back
  99. friendList[f].append(i)
  100.  
  101. for i in range(nsubj):
  102. factual.append(len(friendList[i]))
  103.  
  104. print "Target average friends: ", sum(numf)/float(nsubj)
  105. print "Adusted average assigned friends: ", sum(factual)/float(nsubj)
  106.  
  107. pylab.title("Distribution of # Friends\nAverage number of friends: "+str(sum(factual)/float(nsubj)))
  108. p1 = pylab.hist(factual,bins=range(0,500,10), label="After")
  109. p2 = pylab.hist(numf, bins=range(0,500,10), histtype = 'step', label="Before Adjustment", linewidth=3)
  110. #pylab.legend([p2,p1],["Before adjustment","After"])
  111. pylab.show()
  112.  
  113. return friendList
  114.  
  115. def howMany(flist):
  116. """
  117. How many friends do your friends have, on average?
  118. And how does that compare to the number you have?
  119. """
  120. numfof = []
  121. numf = []
  122.  
  123. for i in range(len(flist)):
  124. numf.append(len(flist[i]))
  125. ifof = 0
  126. for j in flist[i]:
  127. ifof += len(flist[j])
  128. avgfof = float(ifof)/len(flist[i])
  129. numfof.append(avgfof)
  130.  
  131. #print "Average friends: ", float(sum(numf))/len(flist)
  132. print "Average friends of friends: ", float(sum(numfof))/len(flist)
  133.  
  134. pylab.title("Friendship Paradox")
  135. pylab.xlabel('Person')
  136. pylab.ylabel('# Friends')
  137. p1 = pylab.plot(numf, "bo", label="# Friends")
  138. p2 = pylab.plot(numfof, "ro", label="# F of F")
  139. pylab.legend()
  140. pylab.show()
  141.  
  142. return numfof
  143.  
  144. numf = pickNumFriendsRecur(500, shape=3, stretch=8)
  145. flist = assignFriends(numf)
  146. fof = howMany(flist)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement