Advertisement
Guest User

hw3

a guest
Feb 25th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.86 KB | None | 0 0
  1. hw3data <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data", sep=",", header = TRUE)
  2. names(hw3data)
  3.  
  4. max = 5000 # max number of random points
  5. n = 100 #number of points
  6. maxIter = 10 #max # of iteration
  7.  
  8. #randomly generate points (x,y)
  9. x <- sample(1:max, n)
  10. y <- sample(1:max, n)
  11.  
  12. #put points in matrix
  13. z <- c(x,y)
  14. m = matrix(z, ncol=2)
  15.  
  16. ks <- c(1,2,3,4,8,10,15,120)
  17. for(k in ks)
  18. myKmeans(m,k,max)
  19.  
  20. myKmeans <- function(m, k, max)
  21. {
  22. #initilize the first centroids
  23. #each one should be randomly assigned
  24. x <- m[, 1]
  25. y <- m[, 2]
  26. d=matrix(data=NA, ncol=0, nrow=0)
  27. for(i in 1:k)
  28. d <- c(d, c(x[i], y[i]))
  29.  
  30. init <- matrix(d, ncol2, byrow= TRUE)
  31. dev.new()
  32. plotTitle <- paste("K-Means Clustering K=", k)
  33. plot(m, xlim=c(1,max), ylim=c(1,max), xlab= "X",
  34. ylab="Y", pch=20, main=plotTitle)
  35. par(new=T)
  36. plot(init, pch=2, xlim=c(1,max), ylim=c(1,max),xlab="X", ylab="Y")
  37. par(new=T)
  38. oldMeans <- init
  39. oldMeans
  40. c1 <- Clustering(m, oldMeans)
  41. c1
  42. means <- UpdateMeans(m, cl, k)
  43.  
  44. thr <- delta(oldMeans, means)
  45. itr <- 1
  46. while(thr > threshold)
  47. {
  48. cl <- Clustering(m, means)
  49. oldMeans <- means
  50. means <- UpdateMeans(m, cl, k)
  51. thr <- delta(oldMeans, means)
  52. itr <- itr+1
  53. }
  54. cl
  55. thr
  56. means
  57. itr
  58. #because they were defined above?
  59.  
  60. for(km in 1:k)
  61. {
  62. group <- which(c1 == km)
  63.  
  64. plot(m[group,], axes=F, col=km, xlim=c(1,max), ylim=c(1,max),
  65. pch=20, xlab="X", ylab ="Y")
  66. par(new=T)
  67. }
  68.  
  69. plot(means, axes=F,pch=8,col=15,xlim+c(1,max), ylim=c(1,max), xlab="X", ylab="Y")
  70. par(new=T)
  71.  
  72. dev.off()
  73. }#end of myKmeans
  74.  
  75. #function distance
  76. dist <- function(x,y)
  77. {
  78. d <- sqrt( sum((x - y) **2))
  79. }
  80.  
  81. createMeanMatrix <- function(d)
  82. {
  83. matrix(d,ncol=2, byrow=TRUE)
  84. }
  85.  
  86. #compute euclidean distance
  87. euclid <- function(a,b){
  88. d<-sqrt(a**2 + b**2)
  89. }
  90. euclid2<-function(a){
  91. d<-sqrt(sum(a**2))
  92. }
  93.  
  94. #compute difference between old and new means
  95. delta <- function(oldMeans, newMeans)
  96. {
  97. a <- newMeans - oldMeans
  98. max(euclid(a[,1], a[,2]))
  99. }
  100.  
  101. Clustering <- function(m, means)
  102. {
  103. clusters = c()
  104. n <- nrow(m)
  105. for(i in 1:n)
  106. {
  107. distances = c()
  108. k <- nrow(means)
  109. for(j in 1:k)
  110. {
  111. di <- m[i, ] - means[j,]
  112. ds<- euclid2(di)
  113. distance <- c(distances, ds)
  114. }
  115. minDist <- min(distances)
  116. cl <- match(minDist, distances)
  117. clusters <- c(clusters, cl)
  118. }
  119. return(clusters)
  120. }
  121.  
  122. UpdateMeans <- function(m,cl,k)
  123. {
  124. means <- c()
  125. for(c in 1:k)
  126. {
  127. #get the point of cluster c
  128. group <- which(cl == c)
  129.  
  130. #compute the mean point of all points in cluster c
  131. mt1 <- mean(m[group,1])
  132. mt2 <- mean(m[group,2])
  133. vMean <- c(mt1, mt2)
  134. means <- c(means, vMean)
  135. }
  136. means <- createMeanMatrix(means)
  137. return(means)
  138. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement