Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Iris
- #clustering
- dataset <- read.csv('iris.csv')
- principal_components <- prcomp(data.matrix(dataset[, 2:5]), center = TRUE, scale. = TRUE)
- plot(x=principal_components$x[,1], y=principal_components$x[,2], col=rainbow(3)[dataset$Species])
- clusters<- kmeans(principal_components$x[,1:2], centers = 3)
- plot(x = principal_components$x[,1], y = principal_components$x[,2], col=rainbow(3)[clusters$cluster])
- legend(1, 2.8, legend=c("setose", "versicolor", "virginica"), col=rainbow(3), lty=1:2, cex=0.8)
- #manova
- #shapiro
- irisGroupedBySpecies <- split(dataset[ , 2:5], f = dataset$Species)
- library(mvnormtest)
- mshapiro.test(t(irisGroupedBySpecies$`setosa`))
- mshapiro.test(t(irisGroupedBySpecies$`versicolor`))
- mshapiro.test(t(irisGroupedBySpecies$`virginica`))
- #removing outliers
- library(mvoutlier)
- #outlierNT = chisq.plot(irisGroupedBySpecies$`versicolor`, quan=1/2, ask=TRUE)
- versiColor_noOutliers <- irisGroupedBySpecies$`versicolor`[-19,]
- mshapiro.test(t(versiColor_noOutliers))
- outlierNT = chisq.plot(irisGroupedBySpecies$`virginica`, quan=1/2, ask=TRUE)
- virginica_noOutliers <- irisGroupedBySpecies$`virginica`[-19,]
- mshapiro.test(t(virginica_noOutliers))
- mshapiro.test(t(irisGroupedBySpecies$`setosa`))
- mshapiro.test(t(versiColor_noOutliers))
- mshapiro.test(t(virginica_noOutliers))
- #homoscedastic test (would be done only for MANOVA, no need for ANOVA)
- source("boxTest.R")
- BoxMTest(dataset[,2:5],cl = dataset$Species)
- #Anova for Sepal.Length
- anova(lm(dataset$Sepal.Length ~ dataset$Species))
- mean(irisGroupedBySpecies$`virginica`$Sepal.Length)
- mean(irisGroupedBySpecies$`versicolor`$Sepal.Length)
- mean(irisGroupedBySpecies$`setosa`$Sepal.Length)
- #statistically different
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement