Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using Plots;
- using Base;
- using LinearAlgebra;
- using Statistics;
- using DataFrames;
- using CSV;
- using StatsPlots;
- filename = "/home/nchashch/BORG/SEM6/ITASOU/dataset/iris.data";
- iris = CSV.read(filename, header=false)
- iris = dropmissing(iris, disallowmissing=false);
- header = [:sepal_length, :sepal_width, :petal_length, :petal_width, :species];
- names!(iris, header);
- function pca(X)
- X_s = (X .- mean(X, dims=1)) ./ std(X, dims=1);
- n, _ = size(X_s)
- gram = transpose(X_s) * X_s / (n - 1);
- lambda = eigvals(gram);
- vecs = eigvecs(gram);
- # println(lambda);
- X_s * vecs
- end
- features = header[1:4];
- X = convert(Matrix, iris[:, features]);
- irispca = pca(X)
- irispca = DataFrame(irispca)
- # println(first(irispca, 5));
- irispca[:species] = iris[:species]
- plt1 = @df irispca scatter(:x4, :x3, group=:species,
- title = "Iris Dataset PCA",
- xlabel = "Frist principal component", ylabel = "Second principal component",
- m=(0.7, [:cross :hex :star7], 5),
- bg=RGB(.2,.2,.2),
- );
- plt2 = @df iris scatter(:sepal_length, :sepal_width, group=:species,
- title = "Iris Dataset",
- xlabel = "Sepal length", ylabel = "Sepal width",
- m=(0.7, [:cross :hex :star7], 5),
- bg=RGB(.2,.2,.2),
- );
- plot(plt1, plt2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement