Advertisement
Guest User

Untitled

a guest
Dec 28th, 2014
270
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.70 KB | None | 0 0
  1. log(1+tf)*log(N/(1+df)) # N is the number of coloumns of the matrix
  2. # tf is the value at a cell of the matrix
  3. # df is the number of non-zero elements in a row
  4.  
  5. csr[i,j] = log(1+csr[i,j])*log(csr.shape[1]/(1+sum(csr[i]!=0))
  6.  
  7. import numpy as np
  8. import scipy.sparse
  9. import scipy.io
  10.  
  11. csr = scipy.sparse.csr_matrix(scipy.io.mmread('thedata'))
  12.  
  13. for iter1 in xrange(csr.shape[0]) :
  14.  
  15. # Finding indices of non-zero data in the matrix
  16. tmp,non_zero_indices = csr[iter1].nonzero()
  17. # dont need tmp
  18.  
  19. df = len(non_zero_indices)
  20. if df > 0 :
  21. # This line takes a long time...
  22. csr[iter1,non_zero_indices] = np.log(1.0+csr[iter1,non_zero_indices].todense())*np.log((num_of_docs)/(1.0+df))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement