Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- log(1+tf)*log(N/(1+df)) # N is the number of coloumns of the matrix
- # tf is the value at a cell of the matrix
- # df is the number of non-zero elements in a row
- csr[i,j] = log(1+csr[i,j])*log(csr.shape[1]/(1+sum(csr[i]!=0))
- import numpy as np
- import scipy.sparse
- import scipy.io
- csr = scipy.sparse.csr_matrix(scipy.io.mmread('thedata'))
- for iter1 in xrange(csr.shape[0]) :
- # Finding indices of non-zero data in the matrix
- tmp,non_zero_indices = csr[iter1].nonzero()
- # dont need tmp
- df = len(non_zero_indices)
- if df > 0 :
- # This line takes a long time...
- csr[iter1,non_zero_indices] = np.log(1.0+csr[iter1,non_zero_indices].todense())*np.log((num_of_docs)/(1.0+df))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement