Guest User

Untitled

a guest
Dec 14th, 2017
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.10 KB | None | 0 0
  1. for i in range(N): #the larger is N, the better it is
  2. df_sh = df.apply(numpy.random.permutation, axis=1)
  3. #where df this is my large dataframe, with 10K rows and 1K columns
  4.  
  5. corr = df_sh.corrwith(s, axis = 1)
  6. #where s is the provided series (shape of s =(1000,))
  7.  
  8. def corr2_coeff_2d_1d(A, B):
  9. # Rowwise mean of input arrays & subtract from input arrays themeselves
  10. A_mA = A - A.mean(1,keepdims=1)
  11. B_mB = B - B.mean()
  12.  
  13. # Sum of squares across rows
  14. ssA = np.einsum('ij,ij->i',A_mA,A_mA)
  15. ssB = B_mB.dot(B_mB)
  16.  
  17. # Finally get corr coeff
  18. return A_mA.dot(B_mB)/np.sqrt(ssA*ssB)
  19.  
  20. # Extract underlying arry data for faster NumPy processing in loop later on
  21. a = df.values
  22. s_ar = s.values
  23.  
  24. # Setup array for row-indexing with NumPy's advanced indexing later on
  25. r = np.arange(a.shape[0])[:,None]
  26.  
  27. for i in range(N):
  28. np.random.shuffle(a.T)
  29.  
  30. # Compute correlation
  31. corr = corr2_coeff_2d_1d(a, s_ar)
  32.  
  33. a = df.values
  34. s_ar = s.values
  35. r = np.arange(a.shape[0])[:,None]
  36.  
  37. B = s_ar
  38. B_mB = B - B.mean()
  39. ssB = B_mB.dot(B_mB)
  40.  
  41. A = a
  42. A_mean = A.mean(1,keepdims=1)
  43.  
  44. for i in range(N):
  45. np.random.shuffle(a.T)
  46.  
  47. # Compute correlation
  48. A = a
  49. A_mA = A - A_mean
  50. ssA = np.einsum('ij,ij->i',A_mA,A_mA)
  51. corr = A_mA.dot(B_mB)/np.sqrt(ssA*ssB)
  52.  
  53. In [170]: df = pd.DataFrame(np.random.rand(10000,1000))
  54.  
  55. In [171]: s = pd.Series(df.iloc[0])
  56.  
  57. In [172]: %%timeit
  58. ...: df_sh = df.apply(np.random.permutation, axis=1)
  59. ...: corr = df_sh.corrwith(s, axis = 1)
  60. 1 loop, best of 3: 2.09 s per loop
  61.  
  62. In [173]: a = df.values
  63. ...: s_ar = s.values
  64. ...: r = np.arange(a.shape[0])[:,None]
  65. ...:
  66. ...: B = s_ar
  67. ...: B_mB = B - B.mean()
  68. ...: ssB = B_mB.dot(B_mB)
  69. ...:
  70. ...: A = a
  71. ...: A_mean = A.mean(1,keepdims=1)
  72.  
  73. In [174]: %%timeit
  74. ...: np.random.shuffle(a.T)
  75. ...:
  76. ...: # Compute correlation
  77. ...: A = a
  78. ...: A_mA = A - A_mean
  79. ...: ssA = np.einsum('ij,ij->i',A_mA,A_mA)
  80. ...: corr = A_mA.dot(B_mB)/np.sqrt(ssA*ssB)
  81. 1 loop, best of 3: 181 ms per loop
  82.  
  83. In [175]: 2090.0/181
  84. Out[175]: 11.546961325966851
Add Comment
Please, Sign In to add comment