Advertisement
Guest User

Untitled

a guest
May 3rd, 2016
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.33 KB | None | 0 0
  1. def LDAnalysis_manual(X, y):
  2. n_features = X.shape[1]
  3. n_classes = len(np.unique(y))
  4.  
  5. print("Mean vectors...")
  6. mean_vectors = []
  7. for cl in range(n_classes):
  8. mean_vectors.append(np.mean(X[y == cl], axis=0))
  9. # print("Mean vector class {}: {}".format(cl, mean_vectors[cl - 1]))
  10.  
  11. print("In-class scatter matrix...")
  12. S_W = np.zeros((n_features, n_features))
  13. for cl, mv in zip(range(1, n_classes), mean_vectors):
  14. class_sc_mat = np.zeros((n_features, n_features)) # each class' scatter matrix
  15. for row in X[y == cl]:
  16. row, mv = row.reshape(n_features, 1), mv.reshape(n_features, 1) # column vectors
  17. class_sc_mat += (row - mv).dot((row - mv).T)
  18. S_W += class_sc_mat # sum class scatter matrices
  19.  
  20. overall_mean = np.mean(X, axis=0)
  21. print("Between-class scatter matrix...")
  22. S_B = np.zeros((n_features, n_features))
  23.  
  24. for i, mean_vec in enumerate(mean_vectors):
  25. n = X[y == i + 1].shape[0]
  26. mean_vec = mean_vec.reshape(n_features, 1) # make column vector
  27. overall_mean = overall_mean.reshape(n_features, 1)
  28. S_B += n * (mean_vec - overall_mean).dot((mean_vec - overall_mean).T)
  29.  
  30. eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
  31.  
  32. print("Eigenvector test")
  33. for i in range(len(eig_vals)):
  34. print("r{:3}".format(i), end=" ")
  35. sys.stdout.flush()
  36.  
  37. eigv = eig_vecs[:, i].reshape(n_features, 1)
  38. np.testing.assert_array_almost_equal(np.linalg.inv(S_W).dot(S_B).dot(eigv).real,
  39. (eig_vals[i] * eigv).real,
  40. decimal=6, err_msg='', verbose=True)
  41. __log.debug("nAll values ok.")
  42.  
  43. eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in
  44. range(len(eig_vals))] # make list of value & vector tuples
  45. eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True) # Sort tuple-list from high to low
  46.  
  47. __log.info("nEigenvalues (decending):")
  48. for i in eig_pairs:
  49. __log.info(i[0])
  50.  
  51. tot = sum(eig_vals)
  52. var_exp = [(i / tot) for i in sorted(eig_vals, reverse=True)]
  53. cum_var_exp = np.cumsum(var_exp)
  54. cum_var_exp = cum_var_exp.real
  55. plot(len(var_exp), var_exp, cum_var_exp)
  56.  
  57. idx_98 = next(idx for idx, val in enumerate(cum_var_exp) if val > .98)
  58. return idx_98 + 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement