Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- hw3data <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data", sep=",", header = TRUE)
- names(hw3data)
- max = 5000 # max number of random points
- n = 100 #number of points
- maxIter = 10 #max # of iteration
- #randomly generate points (x,y)
- x <- sample(1:max, n)
- y <- sample(1:max, n)
- #put points in matrix
- z <- c(x,y)
- m = matrix(z, ncol=2)
- ks <- c(1,2,3,4,8,10,15,120)
- for(k in ks)
- myKmeans(m,k,max)
- myKmeans <- function(m, k, max)
- {
- #initilize the first centroids
- #each one should be randomly assigned
- x <- m[, 1]
- y <- m[, 2]
- d=matrix(data=NA, ncol=0, nrow=0)
- for(i in 1:k)
- d <- c(d, c(x[i], y[i]))
- init <- matrix(d, ncol2, byrow= TRUE)
- dev.new()
- plotTitle <- paste("K-Means Clustering K=", k)
- plot(m, xlim=c(1,max), ylim=c(1,max), xlab= "X",
- ylab="Y", pch=20, main=plotTitle)
- par(new=T)
- plot(init, pch=2, xlim=c(1,max), ylim=c(1,max),xlab="X", ylab="Y")
- par(new=T)
- oldMeans <- init
- oldMeans
- c1 <- Clustering(m, oldMeans)
- c1
- means <- UpdateMeans(m, cl, k)
- thr <- delta(oldMeans, means)
- itr <- 1
- while(thr > threshold)
- {
- cl <- Clustering(m, means)
- oldMeans <- means
- means <- UpdateMeans(m, cl, k)
- thr <- delta(oldMeans, means)
- itr <- itr+1
- }
- cl
- thr
- means
- itr
- #because they were defined above?
- for(km in 1:k)
- {
- group <- which(c1 == km)
- plot(m[group,], axes=F, col=km, xlim=c(1,max), ylim=c(1,max),
- pch=20, xlab="X", ylab ="Y")
- par(new=T)
- }
- plot(means, axes=F,pch=8,col=15,xlim+c(1,max), ylim=c(1,max), xlab="X", ylab="Y")
- par(new=T)
- dev.off()
- }#end of myKmeans
- #function distance
- dist <- function(x,y)
- {
- d <- sqrt( sum((x - y) **2))
- }
- createMeanMatrix <- function(d)
- {
- matrix(d,ncol=2, byrow=TRUE)
- }
- #compute euclidean distance
- euclid <- function(a,b){
- d<-sqrt(a**2 + b**2)
- }
- euclid2<-function(a){
- d<-sqrt(sum(a**2))
- }
- #compute difference between old and new means
- delta <- function(oldMeans, newMeans)
- {
- a <- newMeans - oldMeans
- max(euclid(a[,1], a[,2]))
- }
- Clustering <- function(m, means)
- {
- clusters = c()
- n <- nrow(m)
- for(i in 1:n)
- {
- distances = c()
- k <- nrow(means)
- for(j in 1:k)
- {
- di <- m[i, ] - means[j,]
- ds<- euclid2(di)
- distance <- c(distances, ds)
- }
- minDist <- min(distances)
- cl <- match(minDist, distances)
- clusters <- c(clusters, cl)
- }
- return(clusters)
- }
- UpdateMeans <- function(m,cl,k)
- {
- means <- c()
- for(c in 1:k)
- {
- #get the point of cluster c
- group <- which(cl == c)
- #compute the mean point of all points in cluster c
- mt1 <- mean(m[group,1])
- mt2 <- mean(m[group,2])
- vMean <- c(mt1, mt2)
- means <- c(means, vMean)
- }
- means <- createMeanMatrix(means)
- return(means)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement