CJamie

pcalda

Mar 23rd, 2022
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.53 KB | None | 0 0
  1. import numpy as np
  2. class PCA:
  3. def __init__(self, n_components):
  4. self.n_components = n_components
  5. self.components = None
  6. self.mean = None
  7.  
  8. def fit(self, X):
  9. self.mean = np.mean(X, axis=0)
  10. X = X - self.mean
  11.  
  12. cov = np.cov(X.T)
  13.  
  14. eigenvalues, eigenvectors = np.linalg.eig(cov)
  15.  
  16. # -> eigenvector v = [:,i] column vector, transpose for easier calculations
  17. # sort eigenvectors
  18. eigenvectors = eigenvectors.T
  19. idxs = np.argsort(eigenvalues)[::-1]
  20. eigenvalues = eigenvalues[idxs]
  21. eigenvectors = eigenvectors[idxs]
  22.  
  23. # store first n eigenvectors
  24. self.components = eigenvectors[0 : self.n_components]
  25. def transform(self, X):
  26. X = X - self.mean
  27. return np.dot(X, self.components.T)
  28.  
  29. import matplotlib.pyplot as plt
  30. from sklearn import datasets
  31.  
  32. data = datasets.load_iris()
  33. X = data.data
  34. y = data.target
  35.  
  36. pca = PCA(2)
  37. pca.fit(X)
  38. X_projected = pca.transform(X)
  39.  
  40. print("Shape of X:", X.shape)
  41. print("Shape of transformed X:", X_projected.shape)
  42.  
  43. x1 = X_projected[:, 0]
  44. x2 = X_projected[:, 1]
  45.  
  46. plt.scatter(x1, x2, c=y, edgecolor="none", alpha=0.8, cmap=plt.cm.get_cmap("viridis", 3))
  47. plt.xlabel("Principal Component 1")
  48. plt.ylabel("Principal Component 2")
  49. plt.colorbar()
  50. plt.show()
  51.  
  52. """# LDA """
  53.  
  54. import numpy as np
  55. class LDA:
  56. def __init__(self, n_components):
  57. self.n_components = n_components
  58. self.linear_discriminants = None
  59.  
  60. def fit(self, X, y):
  61. n_features = X.shape[1]
  62. class_labels = np.unique(y)
  63.  
  64. # Within class scatter matrix:
  65. # SW = sum((X_c - mean_X_c)^2 )
  66.  
  67. # Between class scatter:
  68. # SB = sum( n_c * (mean_X_c - mean_overall)^2 )
  69.  
  70. mean_overall = np.mean(X, axis=0)
  71. SW = np.zeros((n_features, n_features))
  72. SB = np.zeros((n_features, n_features))
  73. for c in class_labels:
  74. X_c = X[y == c]
  75. mean_c = np.mean(X_c, axis=0)
  76. # (4, n_c) * (n_c, 4) = (4,4) -> transpose
  77. SW += (X_c - mean_c).T.dot((X_c - mean_c))
  78.  
  79. # (4, 1) * (1, 4) = (4,4) -> reshape
  80. n_c = X_c.shape[0]
  81. mean_diff = (mean_c - mean_overall).reshape(n_features, 1)
  82. SB += n_c * (mean_diff).dot(mean_diff.T)
  83.  
  84. # Determine SW^-1 * SB
  85. A = np.linalg.inv(SW).dot(SB)
  86. # Get eigenvalues and eigenvectors of SW^-1 * SB
  87. eigenvalues, eigenvectors = np.linalg.eig(A)
  88. # -> eigenvector v = [:,i] column vector, transpose for easier calculations
  89. # sort eigenvalues high to low
  90. eigenvectors = eigenvectors.T
  91. idxs = np.argsort(abs(eigenvalues))[::-1]
  92. eigenvalues = eigenvalues[idxs]
  93. eigenvectors = eigenvectors[idxs]
  94. # store first n eigenvectors
  95. self.linear_discriminants = eigenvectors[0 : self.n_components]
  96.  
  97. def transform(self, X):
  98. # project data
  99. return np.dot(X, self.linear_discriminants.T)
  100.  
  101. import matplotlib.pyplot as plt
  102. from sklearn import datasets
  103.  
  104. data = datasets.load_iris()
  105. X, y = data.data, data.target
  106.  
  107. lda = LDA(2)
  108. lda.fit(X, y)
  109. X_projected = lda.transform(X)
  110.  
  111. print("Shape of X:", X.shape)
  112. print("Shape of transformed X:", X_projected.shape)
  113.  
  114. x1, x2 = X_projected[:, 0], X_projected[:, 1]
  115.  
  116. plt.scatter(
  117. x1, x2, c=y, edgecolor="none", alpha=0.8, cmap=plt.cm.get_cmap("viridis", 3)
  118. )
  119.  
  120. plt.xlabel("Linear Discriminant 1")
  121. plt.ylabel("Linear Discriminant 2")
  122. plt.colorbar()
  123. plt.show()
  124.  
  125.  
Advertisement
Add Comment
Please, Sign In to add comment