Untitled

log(1+tf)*log(N/(1+df))   # N is the number of coloumns of the matrix
# tf is the value at a cell of the matrix
# df is the number of non-zero elements in a row

csr[i,j] = log(1+csr[i,j])*log(csr.shape[1]/(1+sum(csr[i]!=0))

import numpy as np
import scipy.sparse
import scipy.io

csr = scipy.sparse.csr_matrix(scipy.io.mmread('thedata'))

for iter1 in xrange(csr.shape[0]) :

    # Finding indices of non-zero data in the matrix
    tmp,non_zero_indices = csr[iter1].nonzero()
    # dont need tmp

    df = len(non_zero_indices)
    if df > 0 :
        # This line takes a long time...
        csr[iter1,non_zero_indices] = np.log(1.0+csr[iter1,non_zero_indices].todense())*np.log((num_of_docs)/(1.0+df))