Untitled

"""
Do your friends have more friends than you?
It's not you.  It's the Friendship Paradox.
Sample code that accompanies
http://badmomgoodmom.blogspot.com/2013/01/self-learner-project-friendship.html
Written 6 Jan 2013 by BMGM
"""
import pylab
import numpy

def pickNumFriends(nsubj, shape=3, scale=5, stretch=5):
    """
        nsubj: number of people/subjects in circle
        numf: number of friends for each subject, drawn from a stretched
        gamma distribution of default values (shape=4, scale=5, stretch=10)
        If number of friends for a subject > nsub, selects another number
        returns a list sorted in ascending order
        This is in here for reference but use the newer
        pickNumFriendsRecur instead.

    """

    if nsubj <= 0:
        raise ValueError('Set size must be greater than 0')

    numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]
    print "avg numf before check: ", float(sum(numf))/float(nsubj)

    for i in range(nsubj):
        while numf[i] > nsubj-1 :
            numf[i] = int(0.5+10*numpy.random.gamma(shape,scale))

    print "Avg numf after size check: ", float(sum(numf))/float(nsubj)
    # there should be only a slight difference, if any
    # otherwise, increase the nsubj, reduce stretch, or change shape, scale

    return(pylab.sort(numf))

def pickNumFriendsRecur(nsubj, shape=2, scale=5, stretch=5):
    """
        nsubj: number of people/subjects in circle
        numf: number of friends for each subject, drawn from a stretched
        gamma distribution of default values (shape=4, scale=5, stretch=10)
        if max(numf) > nsubj, reduces scale factor
        and tries again, recursively, until not true
        returns a list sorted in ascending order

    """

    if nsubj <= 0:
        raise ValueError('Set size must be greater than 0')

    numf = [int(0.5+stretch*numpy.random.gamma(shape,scale)) for _ in range(nsubj)]

    if max(numf) > nsubj:
        print "max(numf) > nsubj: ", max(numf), nsubj
        print "trying reducing scale to: ", scale - 0.1
        numf = pickNumFriendsRecur(nsubj, shape, scale-0.1, stretch)
        print "avg numf after iteration: ", float(sum(numf))/float(nsubj)

    #pylab.hist(numf,20)
    #pylab.show()

    return(pylab.sort(numf))

def assignFriends(numf):
    """
        Given the list of the number of friends per subject,
        returns a list of lists of the friends for each subject
    """
    import random
    nsubj = len(numf)
    friendList = [[] for _ in range(nsubj)]
    toofew = 0
    factual = []

    for i in range(nsubj):
        #print "i, numf, friendstart: ", i, numf[i], friendList[i]
        # number of friends left to assign
        npick = numf[i] - len(friendList[i])
        pool = range(i+1, nsubj)

        #print "nleft to pick, len(pool), pool: ", nleft, len(pool), pool
        if npick <= len(pool):
            newfriends = random.sample(set(pool),npick)
        else: # Not enough left in the pool.  Assigning all remaining"
            toofew += 1
            newfriends = pool
            # Then randomly pick some more from earlier rejections
            nleft = npick - len(newfriends)
            reluctant = random.sample(set(range(i)), nleft)
            newfriends += reluctant

        #print friends
        friendList[i] += newfriends


        for f in newfriends: # newfriends friend back
            friendList[f].append(i)

    for i in range(nsubj):
        factual.append(len(friendList[i]))

    print "Target average friends: ", sum(numf)/float(nsubj)
    print "Adusted average assigned friends: ", sum(factual)/float(nsubj)

    pylab.title("Distribution of # Friends\nAverage number of friends: "+str(sum(factual)/float(nsubj)))
    p1 = pylab.hist(factual,bins=range(0,500,10), label="After")
    p2 = pylab.hist(numf, bins=range(0,500,10), histtype = 'step', label="Before Adjustment", linewidth=3)
    #pylab.legend([p2,p1],["Before adjustment","After"])
    pylab.show()

    return friendList

def howMany(flist):
    """
    How many friends do your friends have, on average?
    And how does that compare to the number you have?
    """
    numfof = []
    numf = []

    for i in range(len(flist)):
        numf.append(len(flist[i]))
        ifof = 0
        for j in flist[i]:
            ifof += len(flist[j])
        avgfof = float(ifof)/len(flist[i])
        numfof.append(avgfof)

    #print "Average friends: ", float(sum(numf))/len(flist)
    print "Average friends of friends: ", float(sum(numfof))/len(flist)

    pylab.title("Friendship Paradox")
    pylab.xlabel('Person')
    pylab.ylabel('# Friends')
    p1 = pylab.plot(numf, "bo", label="# Friends")
    p2 = pylab.plot(numfof, "ro", label="# F of F")
    pylab.legend()
    pylab.show()

    return numfof

numf = pickNumFriendsRecur(500, shape=3, stretch=8)
flist = assignFriends(numf)
fof = howMany(flist)