pb1Bia

"""
Use the iris dataset in order to reduce its dimensionality down to three dimensions by using the PCA algorithm.
Print the principal components and the explained variance ratio and plot the 3D dataset.
Also visualize the data reduced to three dimensions using the t-SNE method.
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Load the iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Reduce the dimensionality of the data to 3 dimensions
pca = PCA(n_components=3)
X_reduced = pca.fit_transform(X)

# Print the principal components and the explained variance ratio
print("Principal components:",pca.components_)
print("Explained variance ratio:",pca.explained_variance_ratio_)

# Plot the 3D dataset
fig = plt.figure(figsize=(6, 3.8), constrained_layout=True)
ax = fig.add_subplot(111, projection='3d')

#plot the points above and below the plane
#X_reduced = pca.inverse_transform(X_reduced)
X3D_above = X[X[:, 2] > X_reduced[:, 2]]
X3D_below = X[X[:, 2] <= X_reduced[:, 2]]
ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], "bo")
ax.plot(X3D_below[:, 0], X3D_below[:, 1], X3D_below[:, 2], "bo", alpha=0.5)

#plot the plane
axes = [-8, 8, -6, 6, -3, 7]

x1s = np.linspace(axes[0], axes[1], 10)
x2s = np.linspace(axes[2], axes[3], 10)
x1, x2 = np.meshgrid(x1s, x2s)
C = pca.components_
R = C.T.dot(C)
z = (R[0, 2] * x1 + R[1, 2] * x2) / (1 - R[2, 2])
ax.plot_surface(x1, x2, z, alpha=0.2, color="k")

#plot the projections on the plane
ax.plot(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], "k+")
ax.plot(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], "k.")

#plot the lines connecting the 3D points with their projections on the plane
n, d = X.shape
for i in range(n):
    if X[i, 2] > X_reduced[i, 2]:
        ax.plot([X[i][0], X_reduced[i][0]], [X[i][1], X_reduced[i][1]], [X[i][2], X_reduced[i][2]], "k-")
    else:
        ax.plot([X[i][0], X_reduced[i][0]], [X[i][1], X_reduced[i][1]], [X[i][2], X_reduced[i][2]], "k-", color="#505050")

ax.set_xlabel("$x_1$", fontsize=18)
ax.set_ylabel("$x_2$", fontsize=18)
ax.set_zlabel("$x_3$", fontsize=18)
ax.set_xlim(axes[0:2])
ax.set_ylim(axes[2:4])
ax.set_zlim(axes[4:6])

plt.show()

# build a TSNE model
tsne = TSNE(n_components=3, random_state=42)
flowers_tsne = tsne.fit_transform(X)

# transform the iris data onto the first two principal components
#flowers_pca = pca.transform(X)
colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
          "#A83683", "#4E655E", "#853541", "#3A3120", "#535D8E"]
axes2 = [-100, -10, 20, -100, -10, 20]
figure = plt.figure(figsize=(10, 5), constrained_layout=True)
bx = figure.add_subplot(111, projection='3d')
bx.set_xlim(flowers_tsne[:, 0].min(), flowers_tsne[:, 0].max())
bx.set_ylim(flowers_tsne[:, 1].min(), flowers_tsne[:, 1].max())
bx.set_zlim(flowers_tsne[:, 2].min(), flowers_tsne[:, 2].max())
for i in range(len(X)):
    # actually plot the digits as text instead of using scatter
    bx.text(flowers_tsne[i, 0], flowers_tsne[i, 1], flowers_tsne[i, 2], str(y[i]),
             color = colors[y[i]],
             fontdict={'weight': 'bold', 'size': 9})
plt.show()