Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scipy.sparse as ssp
- import numpy as np
- from sklearn.preprocessing import normalize
- from numba.typed import Dict
- from numba import jit, types, njit, prange
- @njit(parallel=True, nogil=True)
- def reverse_push_bipartite(A_indptr, A_indices, A_data, nL, nR, r_max, alpha):
- '''
- Compute significant PPRs based on 2-hop random walks with restart on the
- right side.
- A_indptr: CSC indptr
- A_indices: CSC indices
- A_data: CSC data
- nL: number of nodes on left side
- nR: number of nodes on right side
- r_max: residual, a small positive real number
- alpha: restart probability
- Return:
- List[Dict[int, float]]
- For nL <= v < nL + nR, if PPR(u, v) > r_max, then it would show up as
- PPR(u, v) = result[v - nL][u]
- with an error of o(r_max)
- '''
- result = [Dict.empty(key_type=types.int64, value_type=types.float64)
- for _ in range(nL, nL + nR)]
- for t in prange(nL, nL + nR):
- r = Dict.empty(key_type=types.int64, value_type=types.float64)
- p = Dict.empty(key_type=types.int64, value_type=types.float64)
- r[t] = 1.
- while True:
- for v, r_v in r.items():
- if r_v > r_max:
- break
- else:
- break
- A_v_indices = A_indices[A_indptr[v]:A_indptr[v+1]]
- A_v_data = A_data[A_indptr[v]:A_indptr[v+1]]
- a = alpha if v >= nL else 0
- for i, d in zip(A_v_indices, A_v_data):
- r[i] = r.get(i, 0) + (1 - a) * d * r_v
- if a > 0:
- p[v] = p.get(v, 0) + a * r_v
- del r[v]
- result[t - nL] = p
- print(t)
- return result
- if __name__ == '__main__':
- # 0, 1, 2, 3 are on the left
- # 4, 5, 6, 7 are on the right
- # We want to compute all PPR(u, v) > 0.01 with restart probability 0.2,
- # based on 2-hop random walks on the right side.
- A = ssp.coo_matrix(
- (np.ones(9),
- # source nodes destination nodes
- ([0, 0, 1, 2, 3, 4, 5, 6, 7], [5, 6, 6, 7, 6, 0, 1, 2, 3])))
- A = normalize(A, norm='l1', axis=1).tocsc()
- p = reverse_push_bipartite(
- A.indptr, A.indices, A.data, 4, 4, 0.01, 0.2)
- print('Converting to dict')
- p = [dict(d) for d in p]
- for i, d in enumerate(p):
- for u, v in d.items():
- print('PPR(%d, %d) = %f' % (u, 4 + i, v))
- # Result:
- # PPR(4, 4) = 0.200000
- # PPR(5, 5) = 0.200000
- # PPR(4, 5) = 0.080000
- # PPR(6, 6) = 0.551456
- # PPR(4, 6) = 0.390993
- # PPR(5, 6) = 0.439320
- # PPR(7, 6) = 0.439320
- # PPR(7, 7) = 0.551456
- # PPR(6, 7) = 0.439320
- # PPR(4, 7) = 0.310993
- # PPR(5, 7) = 0.351456
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement