Untitled

import scipy.sparse as ssp
import numpy as np
from sklearn.preprocessing import normalize
from numba.typed import Dict
from numba import jit, types, njit, prange


@njit(parallel=True, nogil=True)
def reverse_push_bipartite(A_indptr, A_indices, A_data, nL, nR, r_max, alpha):
    '''
    Compute significant PPRs based on 2-hop random walks with restart on the
    right side.

    A_indptr: CSC indptr
    A_indices: CSC indices
    A_data: CSC data
    nL: number of nodes on left side
    nR: number of nodes on right side
    r_max: residual, a small positive real number
    alpha: restart probability

    Return:
    List[Dict[int, float]]
        For nL <= v < nL + nR, if PPR(u, v) > r_max, then it would show up as

            PPR(u, v) = result[v - nL][u]

        with an error of o(r_max)
    '''
    result = [Dict.empty(key_type=types.int64, value_type=types.float64)
              for _ in range(nL, nL + nR)]

    for t in prange(nL, nL + nR):
        r = Dict.empty(key_type=types.int64, value_type=types.float64)
        p = Dict.empty(key_type=types.int64, value_type=types.float64)
        r[t] = 1.
        while True:
            for v, r_v in r.items():
                if r_v > r_max:
                    break
            else:
                break
            A_v_indices = A_indices[A_indptr[v]:A_indptr[v+1]]
            A_v_data = A_data[A_indptr[v]:A_indptr[v+1]]
            a = alpha if v >= nL else 0
            for i, d in zip(A_v_indices, A_v_data):
                r[i] = r.get(i, 0) + (1 - a) * d * r_v
            if a > 0:
                p[v] = p.get(v, 0) + a * r_v
            del r[v]

        result[t - nL] = p
        print(t)

    return result

if __name__ == '__main__':
    # 0, 1, 2, 3 are on the left
    # 4, 5, 6, 7 are on the right
    # We want to compute all PPR(u, v) > 0.01 with restart probability 0.2,
    # based on 2-hop random walks on the right side.
    A = ssp.coo_matrix(
            (np.ones(9),
              # source nodes                destination nodes
             ([0, 0, 1, 2, 3, 4, 5, 6, 7], [5, 6, 6, 7, 6, 0, 1, 2, 3])))
    A = normalize(A, norm='l1', axis=1).tocsc()
    p = reverse_push_bipartite(
            A.indptr, A.indices, A.data, 4, 4, 0.01, 0.2)
    print('Converting to dict')
    p = [dict(d) for d in p]
    for i, d in enumerate(p):
        for u, v in d.items():
            print('PPR(%d, %d) = %f' % (u, 4 + i, v))
# Result:
# PPR(4, 4) = 0.200000
# PPR(5, 5) = 0.200000
# PPR(4, 5) = 0.080000
# PPR(6, 6) = 0.551456
# PPR(4, 6) = 0.390993
# PPR(5, 6) = 0.439320
# PPR(7, 6) = 0.439320
# PPR(7, 7) = 0.551456
# PPR(6, 7) = 0.439320
# PPR(4, 7) = 0.310993
# PPR(5, 7) = 0.351456