Advertisement
Guest User

Julia Iris PCA

a guest
Mar 16th, 2019
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Julia 1.26 KB | None | 0 0
  1. using Plots;
  2. using Base;
  3. using LinearAlgebra;
  4. using Statistics;
  5. using DataFrames;
  6. using CSV;
  7. using StatsPlots;
  8.  
  9. filename = "/home/nchashch/BORG/SEM6/ITASOU/dataset/iris.data";
  10. df = CSV.read(filename, header=false)
  11. df = dropmissing(df);
  12. header = [:sepal_length, :sepal_width, :petal_length, :petal_width, :species];
  13. names!(df, header);
  14. features = header[1:4];
  15. X = convert(Matrix, df[:, features]);
  16. X_s = (X .- mean(X, dims=1)) ./ std(X, dims=1);
  17. n, _ = size(X_s)
  18. Gram = transpose(X_s) * X_s / (n - 1);
  19. lambda = eigvals(Gram);
  20. vecs = eigvecs(Gram);
  21. println(lambda);
  22. PCA = X_s * vecs
  23. pca = DataFrame(PCA)
  24. pca[:species] = df[:species]
  25.  
  26. function pca()
  27.  
  28. println(first(pca, 5));
  29.  
  30. plt1 = @df pca scatter(:x4, :x3, group=:species,
  31.                 title = "Iris Dataset PCA",
  32.                 xlabel = "Frist principal component", ylabel = "Second principal component",
  33.                 m=(0.7, [:cross :hex :star7], 5),
  34.                 bg=RGB(.2,.2,.2),
  35.                 );
  36.  
  37. plt2 = @df df scatter(:sepal_length, :sepal_width, group=:species,
  38.                title = "Iris Dataset",
  39.                xlabel = "Sepal length", ylabel = "Sepal width",
  40.                m=(0.7, [:cross :hex :star7], 5),
  41.                bg=RGB(.2,.2,.2),
  42.                );
  43.  
  44. plot(plt1, plt2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement