Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Use the iris dataset in order to reduce its dimensionality down to three dimensions by using the PCA algorithm.
- Print the principal components and the explained variance ratio and plot the 3D dataset.
- Also visualize the data reduced to three dimensions using the t-SNE method.
- """
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import datasets
- from sklearn.decomposition import PCA
- from sklearn.manifold import TSNE
- # Load the iris dataset
- iris = datasets.load_iris()
- X = iris.data
- y = iris.target
- # Reduce the dimensionality of the data to 3 dimensions
- pca = PCA(n_components=3)
- X_reduced = pca.fit_transform(X)
- # Print the principal components and the explained variance ratio
- print("Principal components:",pca.components_)
- print("Explained variance ratio:",pca.explained_variance_ratio_)
- # Plot the 3D dataset
- fig = plt.figure(figsize=(6, 3.8), constrained_layout=True)
- ax = fig.add_subplot(111, projection='3d')
- #plot the points above and below the plane
- #X_reduced = pca.inverse_transform(X_reduced)
- X3D_above = X[X[:, 2] > X_reduced[:, 2]]
- X3D_below = X[X[:, 2] <= X_reduced[:, 2]]
- ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], "bo")
- ax.plot(X3D_below[:, 0], X3D_below[:, 1], X3D_below[:, 2], "bo", alpha=0.5)
- #plot the plane
- axes = [-8, 8, -6, 6, -3, 7]
- x1s = np.linspace(axes[0], axes[1], 10)
- x2s = np.linspace(axes[2], axes[3], 10)
- x1, x2 = np.meshgrid(x1s, x2s)
- C = pca.components_
- R = C.T.dot(C)
- z = (R[0, 2] * x1 + R[1, 2] * x2) / (1 - R[2, 2])
- ax.plot_surface(x1, x2, z, alpha=0.2, color="k")
- #plot the projections on the plane
- ax.plot(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], "k+")
- ax.plot(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], "k.")
- #plot the lines connecting the 3D points with their projections on the plane
- n, d = X.shape
- for i in range(n):
- if X[i, 2] > X_reduced[i, 2]:
- ax.plot([X[i][0], X_reduced[i][0]], [X[i][1], X_reduced[i][1]], [X[i][2], X_reduced[i][2]], "k-")
- else:
- ax.plot([X[i][0], X_reduced[i][0]], [X[i][1], X_reduced[i][1]], [X[i][2], X_reduced[i][2]], "k-", color="#505050")
- ax.set_xlabel("$x_1$", fontsize=18)
- ax.set_ylabel("$x_2$", fontsize=18)
- ax.set_zlabel("$x_3$", fontsize=18)
- ax.set_xlim(axes[0:2])
- ax.set_ylim(axes[2:4])
- ax.set_zlim(axes[4:6])
- plt.show()
- # build a TSNE model
- tsne = TSNE(n_components=3, random_state=42)
- flowers_tsne = tsne.fit_transform(X)
- # transform the iris data onto the first two principal components
- #flowers_pca = pca.transform(X)
- colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
- "#A83683", "#4E655E", "#853541", "#3A3120", "#535D8E"]
- axes2 = [-100, -10, 20, -100, -10, 20]
- figure = plt.figure(figsize=(10, 5), constrained_layout=True)
- bx = figure.add_subplot(111, projection='3d')
- bx.set_xlim(flowers_tsne[:, 0].min(), flowers_tsne[:, 0].max())
- bx.set_ylim(flowers_tsne[:, 1].min(), flowers_tsne[:, 1].max())
- bx.set_zlim(flowers_tsne[:, 2].min(), flowers_tsne[:, 2].max())
- for i in range(len(X)):
- # actually plot the digits as text instead of using scatter
- bx.text(flowers_tsne[i, 0], flowers_tsne[i, 1], flowers_tsne[i, 2], str(y[i]),
- color = colors[y[i]],
- fontdict={'weight': 'bold', 'size': 9})
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement