Advertisement
Guest User

Iris Dataset PCA in Julia

a guest
Mar 16th, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Julia 1.37 KB | None | 0 0
  1. using Plots;
  2. using Base;
  3. using LinearAlgebra;
  4. using Statistics;
  5. using DataFrames;
  6. using CSV;
  7. using StatsPlots;
  8.  
  9. filename = "/home/nchashch/BORG/SEM6/ITASOU/dataset/iris.data";
  10. iris = CSV.read(filename, header=false)
  11. iris = dropmissing(iris, disallowmissing=false);
  12. header = [:sepal_length, :sepal_width, :petal_length, :petal_width, :species];
  13. names!(iris, header);
  14.  
  15. function pca(X)
  16.     X_s = (X .- mean(X, dims=1)) ./ std(X, dims=1);
  17.     n, _ = size(X_s)
  18.     gram = transpose(X_s) * X_s / (n - 1);
  19.     lambda = eigvals(gram);
  20.     vecs = eigvecs(gram);
  21.     # println(lambda);
  22.     X_s * vecs
  23. end
  24.  
  25. features = header[1:4];
  26. X = convert(Matrix, iris[:, features]);
  27. irispca = pca(X)
  28. irispca = DataFrame(irispca)
  29. # println(first(irispca, 5));
  30. irispca[:species] = iris[:species]
  31.  
  32. plt1 = @df irispca scatter(:x4, :x3, group=:species,
  33.                 title = "Iris Dataset PCA",
  34.                 xlabel = "Frist principal component", ylabel = "Second principal component",
  35.                 m=(0.7, [:cross :hex :star7], 5),
  36.                 bg=RGB(.2,.2,.2),
  37.                 );
  38.  
  39. plt2 = @df iris scatter(:sepal_length, :sepal_width, group=:species,
  40.                title = "Iris Dataset",
  41.                xlabel = "Sepal length", ylabel = "Sepal width",
  42.                m=(0.7, [:cross :hex :star7], 5),
  43.                bg=RGB(.2,.2,.2),
  44.                );
  45.  
  46. plot(plt1, plt2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement