Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def pagerank2(E, return_vector = False, max_iters = 1000, tolerance = 1e-6):
- """compute page rank from dense adjacency matrix
- Inputs:
- E: adjacency matrix with links going from cols to rows.
- E is a matrix of 0s and 1s, where E[i,j] = 1 means
- that web page (vertex) j has a link to web page i.
- return_vector = False: If True, return the eigenvector as well as the ranking.
- max_iters = 1000: Maximum number of power iterations to do.
- tolerance = 1e-6: Stop when the eigenvector norm changes by less than this.
- Outputs:
- ranking: Permutation giving the ranking, most important first
- vector (only if return_vector is True): Dominant eigenvector of PageRank matrix
- This computes page rank by the following steps:
- 1. Add links from any dangling vertices to all vertices.
- 2. Scale the columns to sum to 1.
- 3. Add a constant matrix to represent jumping at random 15% of the time.
- 4. Find the dominant eigenvector with the power method.
- 5. Sort the eigenvector to get the rankings.
- The homework problem due February 22 asks you to rewrite this code so
- it takes input E as a scipy csr_sparse matrix, and then never creates
- a full matrix or any large matrix other than E.
- """
- #E = sparse.csr_matrix(E)
- #nnz = np.count_nonzero(E) # This call for sparse E may be different
- # outdegree = sparse.csr_matrix.sum(E, axis=0).T # This call for sparse E may be different
- #assert np.max(E) == 1 and np.sum(E) == nnz, 'E must contain only zeros and ones'
- # 1. Add links from any dangling vertices to all other vertices.
- # 3. Add a constant matrix to represent jumping at random 15% of the time.
- nrows, n = E.shape
- outdegree = E.sum(axis=0)# This call for sparse E may be different
- outdegree = np.squeeze(np.asarray(outdegree))
- outdegreef = np.zeros(n)
- assert nrows == n, 'E must be square'
- for j in range(n):
- if outdegree[j] == 0:
- outdegreef[j] = 1/(n-1)
- outdegree[j] = 1
- s = 1.0/n
- m = 0.15
- #E = (1 - m) * E/sparse.csr_matrix.sum(E, 0) + m*s
- e = np.ones(n)
- v = e / npla.norm(e)
- #v = np.array(v)
- for iteration in range(max_iters):
- oldv = v
- Ev = E.dot((v/outdegree))
- Fv = np.ones(n) * (outdegreef.dot(v)) - (outdegreef*v)
- Sv = np.ones(n) * (np.sum(v)* m/n)
- v = (1-m)*(Ev+Fv)+Sv
- #np.squeeze(np.asarray(v))
- #v = sparse.csr_matrix(v)
- eigval = npla.norm(v)
- v = v / eigval
- if npla.norm(v - oldv) < tolerance:
- break
- if npla.norm(v - oldv) < tolerance:
- print('Dominant eigenvalue is %f after %d iterations.\n' % (eigval, iteration+1))
- else:
- print('Did not converge to tolerance %e after %d iterations.\n' % (tolerance, max_iters))
- # Check that the eigenvector elements are all the same sign, and make them positive
- #assert np.all(v > 0) or np.all(v < 0), 'Error: eigenvector is not all > 0 or < 0'
- vector = np.abs(v)
- # 5. Sort the eigenvector and reverse the permutation to get the rankings.
- ranking = np.argsort(vector)[::-1]
- if return_vector:
- return ranking, vector
- else:
- return ranking
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement