Advertisement
NumberNine

EM

Oct 30th, 2014
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.50 KB | None | 0 0
  1. data=read.table("EMGaussian.data",header=FALSE,dec=".")
  2. data=read.table("EMGaussian.test",header=FALSE,dec=".")
  3. plot(data)
  4. km=kmeans(data,4)
  5. plot(data,col=km$cluster)
  6. install.packages("pdist")
  7. library(pdist)
  8.  
  9. kmeans=function(x,k){
  10.   new=x
  11.   new$clstr=c(0)
  12.   centers=x[sample(nrow(x),k),]
  13.  
  14. }
  15. conver=rep(0,50)
  16. for(temp in 1:50){
  17. new=data
  18. new$clstr=c(0)
  19. centers=data[sample(nrow(data),4),]
  20.  
  21.  
  22.  
  23. #mypdist=pdist(data.matrix(data),data.matrix(centers)))
  24. #mynewpdist=as.matrix(mypdist)
  25. loops=0
  26. while(1){
  27.   loops=loops+1
  28.   tata=matrix(data=NA,nrow=500,ncol=4)
  29.   for(i in 1:500){
  30.     for(j in 1:4){
  31.       tata[i,j]=sqrt(sum((data[i,] - centers[j,]) ^ 2))
  32.     }
  33.   }
  34.  
  35.   mynewpdist=tata
  36.   for(i in 1:500){
  37.     for(j in 1:4){
  38.       if(min(mynewpdist[i,])==mynewpdist[i,j]){
  39.         new$clstr[i]=j
  40.         break
  41.       }
  42.     }
  43.   }
  44.   toast=as.factor(new$clstr)
  45.   #plot(new$V1,new$V2,col=toast)
  46.   #points(centers,pch=23)
  47.  
  48.   #   newmean1= mean(subset(new, clstr==1))
  49.   #   newmean2= mean(subset(new, clstr==2))
  50.   #   newmean3= mean(subset(new, clstr==3))
  51.   #   newmean4= mean(subset(new, clstr==4))
  52.  
  53.   newmean1= apply(subset(new, clstr==1),2,mean)
  54.   newmean2= apply(subset(new, clstr==2),2,mean)
  55.   newmean3= apply(subset(new, clstr==3),2,mean)
  56.   newmean4= apply(subset(new, clstr==4),2,mean)
  57.  
  58.   newcenters=data.frame(newmean1,newmean2,newmean3,newmean4)
  59.   newcenters=data.frame(t(as.matrix(newcenters)))
  60.   newcenters$clstr=NULL
  61. conver[temp]=loops
  62.   if(all(newcenters==centers)){break}
  63.  
  64.   centers=newcenters
  65.  
  66. }
  67. }
  68. install.packages("mvtnorm")
  69. library(mvtnorm)
  70.  
  71. Estep = function(data,mix.pi,mu,sigma,K){
  72.   result = apply(data, 1, function(xt){
  73.     eq_i = sapply(1:K,function(k) {
  74.       #mix.pi[k] * dnorm(xt, mu[,k], sigma[,,k])
  75.     mix.pi[k] * dmvnorm(xt, mu[,k], sigma[,,k])
  76.     });
  77.     eq_i / sum(eq_i);
  78.   });
  79.   t(result);
  80. }
  81.  
  82. Mstep = function(isotrope,q,data,K,N,degree) {
  83.   q.sum = apply(q,2,sum)
  84.   new.mix = q.sum/N;
  85.   new.mu = t(t(t(data) %*% q) / q.sum);
  86.   new.sigma = array(0, dim=c(dim, dim, K));
  87.   if(!isotrope){
  88.   for(k in 1:K) {
  89.     sig = matrix(0,dim,dim);
  90.     for(n in 1:N) {
  91.       sig = sig + q[n, k] * (data[n,]-new.mu[,k]) %*% t(data[n,]-new.mu[,k]);
  92.     }
  93.     new.sigma[,,k] = sig / q.sum[k] #- new.mu[,k] %*% t(new.mu[,k])
  94.   }  
  95.   }
  96.   else if(isotrope){
  97.     for(k in 1:K) {
  98.       sig = matrix(0,dim,dim);
  99.    
  100.       for(n in 1:N) {
  101.         sig[1,1] = sig[1,1] + q[n, k] * t(data[n,]-new.mu[,k]) %*% (data[n,]-new.mu[,k]);
  102.         print(sig)
  103.       }
  104.       sigmacarre=sig[1,1]/(2* q.sum[k])
  105.     print(sigmacarre)
  106.       new.sigma[,,k] = sigmacarre*diag(2)  #- new.mu[,k] %*% t(new.mu[,k])
  107.     }  
  108.    
  109.   }
  110.   list(new.mix, new.mu, new.sigma);
  111. }
  112.  
  113. NEW1=subset(new, clstr==1)
  114. NEW1$clstr=NULL
  115. NEW2=subset(new, clstr==2)
  116. NEW2$clstr=NULL
  117. NEW3=subset(new, clstr==3)
  118. NEW3$clstr=NULL
  119. NEW4=subset(new, clstr==4)
  120. NEW4$clstr=NULL
  121.  
  122.  
  123. var(NEW1)
  124. var(NEW2)
  125. var(NEW3)
  126. var(NEW4)
  127.  
  128.  
  129. K=4
  130. dim=2
  131. # mu=matrix(apply(data,2,mean), nrow=dim, ncol=K)
  132. mu=t(centers)
  133. sigma =array(0, dim=c(dim, dim, K));
  134. sigma[,,1]=var(NEW1)
  135. sigma[,,2]=var(NEW2)
  136. sigma[,,3]=var(NEW3)
  137. sigma[,,4]=var(NEW4)
  138. sigma[,,1]=(mean((NEW1[,1]-centers[1,1])^2)+mean((NEW1[,2]-centers[1,2])^2))/2*diag(2)
  139. sigma[,,2]=(mean((NEW2[,1]-centers[2,1])^2)+mean((NEW2[,2]-centers[2,2])^2))/2*diag(2)
  140. sigma[,,3]=(mean((NEW3[,1]-centers[3,1])^2)+mean((NEW3[,2]-centers[3,2])^2))/2*diag(2)
  141. sigma[,,4]=(mean((NEW4[,1]-centers[4,1])^2)+mean((NEW4[,2]-centers[4,2])^2))/2*diag(2)
  142. mix.pi=rep(1/K,K)
  143. N=500
  144. n.trial=1000
  145.  
  146. count = 0
  147. errorlist = rep(0,n.trial)
  148. for(i in 1:n.trial){
  149.   count = count +1
  150.   q = Estep(data.matrix(data),mix.pi,mu,sigma,K);
  151.   result = Mstep(TRUE,q,data.matrix(data),K,N,dim);
  152.   new.mix.pi =result[[1]]; new.mu =result[[2]]; new.sigma =result[[3]];
  153.   error =sum((new.mix.pi-mix.pi)^2) + sum((new.mu-mu)^2) + sum((new.sigma-sigma)^2)
  154.   errorlist[i] = error
  155.   mix.pi = new.mix.pi;
  156.   mu = new.mu;
  157.   sigma =new.sigma;
  158. plot(ellipse(sigma[,,1],level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="red")
  159. par(new=T)
  160. plot(ellipse(sigma[,,2],level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="blue")
  161. par(new=T)
  162. plot(ellipse(sigma[,,3],level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="green")
  163. par(new=T)
  164. plot(ellipse(sigma[,,4],level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="maroon")
  165. par(new=T)
  166. plot(data,col=toast,axes=F,ylim=c(-12,12),xlim=c(-12,12))
  167.  
  168. points(t(mu[,1]),pch=22,col="red")
  169. points(t(mu[,2]),pch=22,col="red")
  170. points(t(mu[,3]),pch=22,col="red")
  171. points(t(mu[,4]),pch=22,col="red")
  172.  
  173.  
  174.   if (error < 1e-5) break;
  175.  
  176. }
  177. plot(errorlist[1:count],log="y")
  178.  
  179. # install.packages("EMCluster")
  180. # library(EMCluster)
  181. # plotem(result,data)
  182. # sigma.inv=solve(result[[3]][,,1])
  183. # center=result[[2]][,1]
  184. # ellipse <- function(s,t) {u<-c(s,t)-center; u %*% sigma.inv %*% u / 2}
  185. # n <- 50
  186. # x <- (0:(n-1)) * (50/(n-1))
  187. # y <- (0:(n-1)) * (5/(n-1))
  188. # z <- mapply(ellipse, as.vector(rep(x,n)), as.vector(outer(rep(0,n), y, `+`)))
  189. # plot(data, pch=20, xlim=c(-10,10), ylim=c(-10,10), xlab="Packets", ylab="Flows")
  190. # contour(x,y,matrix(z,n,n), levels=(0:10), col = terrain.colors(11), add=TRUE)
  191.  
  192. # install.packages("mixtools")
  193. # library(mixtools)
  194. # ellipse(mu=center, sigma=result[[3]][,,1], alpha = .1,  col="red")
  195.  
  196. install.packages("ellipse")
  197. library(ellipse)
  198.  
  199.  
  200. plot(ellipse(sigma[,,1],level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="yellow")
  201. par(new=T)
  202. plot(ellipse(sigma[,,2],level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="red")
  203. par(new=T)
  204. plot(ellipse(sigma[,,3],level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="maroon")
  205. par(new=T)
  206. plot(ellipse(sigma[,,4],level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="green")
  207. par(new=T)
  208. plot(data,col=toast,axes=F,ylim=c(-12,12),xlim=c(-12,12))
  209.  
  210.  
  211. # plot(ellipse(diag(diag(sigma[,,1])),level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="yellow")
  212. # par(new=T)
  213. # plot(ellipse(diag(diag(sigma[,,2])),level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="red")
  214. # par(new=T)
  215. # plot(ellipse(diag(diag(sigma[,,3])),level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="maroon")
  216. # par(new=T)
  217. # plot(ellipse(diag(diag(sigma[,,4])),level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="green")
  218. # par(new=T)
  219. # plot(data,axes=F,ylim=c(-12,12),xlim=c(-10,10))
  220.  
  221.  
  222.  
  223.  
  224.  
  225. foncfonc=function(n){
  226.     for(i in 1:n) print(i+1)
  227. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement