Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using DataFrames;
- using CSV;
- using LinearAlgebra;
- using Statistics;
- using StatsPlots;
- # Load iris data
- filename = "/home/nchashch/BORG/SEM6/ITASOU/dataset/iris.data";
- iris = CSV.read(filename, header=false)
- iris = dropmissing(iris, disallowmissing=false);
- header = [:sepal_length, :sepal_width, :petal_length, :petal_width, :species];
- names!(iris, header);
- function pca(X)
- # Standardize the data by subtracting the mean
- # and dividing by the standard deviation
- X_s = (X .- mean(X, dims=1)) ./ std(X, dims=1);
- # Get the number of datapoints in the dataset
- n, _ = size(X_s)
- # Compute the correlation matrix
- gram = transpose(X_s) * X_s / (n - 1);
- # NOTE: eigen-values computed by eigvals
- # are sorted in increasing order by default
- # so are the corresponding eigen-vectors
- lambda = eigvals(gram);
- vecs = eigvecs(gram);
- # Return the standardized data in the new
- # principal components (eigen) basis
- X_s * vecs
- end
- # Convert iris flower features into matrix form
- # and perform Principal Component Analysis with
- # the above pca function
- features = header[1:4];
- X = convert(Matrix, iris[:, features]);
- irispca = pca(X)
- irispca = DataFrame(irispca)
- irispca[:species] = iris[:species]
- # Plot datapoints on the plane formed by the first and second principal components
- plt1 = @df irispca scatter(:x4, :x3, group=:species,
- title = "Iris Dataset PCA",
- xlabel = "Frist principal component (z-score)", ylabel = "Second principal component (z-score)",
- m=(0.7, [:cross :hex :star7], 5),
- bg=RGB(.2,.2,.2),
- );
- # Plot datapoints on the plane formed by sepal length and sepal width
- # NOTE: Data on this plot is not standardized
- plt2 = @df iris scatter(:sepal_length, :sepal_width, group=:species,
- title = "Iris Dataset",
- xlabel = "Sepal length (cm)", ylabel = "Sepal width (cm)",
- m=(0.7, [:cross :hex :star7], 5),
- bg=RGB(.2,.2,.2),
- );
- plot(plt1, plt2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement