EM

data=read.table("EMGaussian.data",header=FALSE,dec=".")
data=read.table("EMGaussian.test",header=FALSE,dec=".")
plot(data)
km=kmeans(data,4)
plot(data,col=km$cluster)
install.packages("pdist")
library(pdist)

kmeans=function(x,k){
  new=x
  new$clstr=c(0)
  centers=x[sample(nrow(x),k),]

}
conver=rep(0,50)
for(temp in 1:50){
new=data
new$clstr=c(0)
centers=data[sample(nrow(data),4),]


#mypdist=pdist(data.matrix(data),data.matrix(centers)))
#mynewpdist=as.matrix(mypdist)
loops=0
while(1){
  loops=loops+1
  tata=matrix(data=NA,nrow=500,ncol=4)
  for(i in 1:500){
    for(j in 1:4){
      tata[i,j]=sqrt(sum((data[i,] - centers[j,]) ^ 2))
    }
  }

  mynewpdist=tata
  for(i in 1:500){
    for(j in 1:4){
      if(min(mynewpdist[i,])==mynewpdist[i,j]){
        new$clstr[i]=j
        break
      }
    }
  }
  toast=as.factor(new$clstr)
  #plot(new$V1,new$V2,col=toast)
  #points(centers,pch=23)

  #   newmean1= mean(subset(new, clstr==1))
  #   newmean2= mean(subset(new, clstr==2))
  #   newmean3= mean(subset(new, clstr==3))
  #   newmean4= mean(subset(new, clstr==4))

  newmean1= apply(subset(new, clstr==1),2,mean)
  newmean2= apply(subset(new, clstr==2),2,mean)
  newmean3= apply(subset(new, clstr==3),2,mean)
  newmean4= apply(subset(new, clstr==4),2,mean)

  newcenters=data.frame(newmean1,newmean2,newmean3,newmean4)
  newcenters=data.frame(t(as.matrix(newcenters)))
  newcenters$clstr=NULL
conver[temp]=loops
  if(all(newcenters==centers)){break}

  centers=newcenters

}
}
install.packages("mvtnorm")
library(mvtnorm)

Estep = function(data,mix.pi,mu,sigma,K){
  result = apply(data, 1, function(xt){
    eq_i = sapply(1:K,function(k) {
      #mix.pi[k] * dnorm(xt, mu[,k], sigma[,,k])
    mix.pi[k] * dmvnorm(xt, mu[,k], sigma[,,k])
    });
    eq_i / sum(eq_i);
  });
  t(result);
}

Mstep = function(isotrope,q,data,K,N,degree) {
  q.sum = apply(q,2,sum)
  new.mix = q.sum/N;
  new.mu = t(t(t(data) %*% q) / q.sum);
  new.sigma = array(0, dim=c(dim, dim, K));
  if(!isotrope){
  for(k in 1:K) {
    sig = matrix(0,dim,dim);
    for(n in 1:N) {
      sig = sig + q[n, k] * (data[n,]-new.mu[,k]) %*% t(data[n,]-new.mu[,k]);
    }
    new.sigma[,,k] = sig / q.sum[k] #- new.mu[,k] %*% t(new.mu[,k])
  }
  }
  else if(isotrope){
    for(k in 1:K) {
      sig = matrix(0,dim,dim);

      for(n in 1:N) {
        sig[1,1] = sig[1,1] + q[n, k] * t(data[n,]-new.mu[,k]) %*% (data[n,]-new.mu[,k]);
        print(sig)
      }
      sigmacarre=sig[1,1]/(2* q.sum[k])
    print(sigmacarre)
      new.sigma[,,k] = sigmacarre*diag(2)  #- new.mu[,k] %*% t(new.mu[,k])
    }

  }
  list(new.mix, new.mu, new.sigma);
}

NEW1=subset(new, clstr==1)
NEW1$clstr=NULL
NEW2=subset(new, clstr==2)
NEW2$clstr=NULL
NEW3=subset(new, clstr==3)
NEW3$clstr=NULL
NEW4=subset(new, clstr==4)
NEW4$clstr=NULL


var(NEW1)
var(NEW2)
var(NEW3)
var(NEW4)


K=4
dim=2
# mu=matrix(apply(data,2,mean), nrow=dim, ncol=K)
mu=t(centers)
sigma =array(0, dim=c(dim, dim, K));
sigma[,,1]=var(NEW1)
sigma[,,2]=var(NEW2)
sigma[,,3]=var(NEW3)
sigma[,,4]=var(NEW4)
sigma[,,1]=(mean((NEW1[,1]-centers[1,1])^2)+mean((NEW1[,2]-centers[1,2])^2))/2*diag(2)
sigma[,,2]=(mean((NEW2[,1]-centers[2,1])^2)+mean((NEW2[,2]-centers[2,2])^2))/2*diag(2)
sigma[,,3]=(mean((NEW3[,1]-centers[3,1])^2)+mean((NEW3[,2]-centers[3,2])^2))/2*diag(2)
sigma[,,4]=(mean((NEW4[,1]-centers[4,1])^2)+mean((NEW4[,2]-centers[4,2])^2))/2*diag(2)
mix.pi=rep(1/K,K)
N=500
n.trial=1000

count = 0
errorlist = rep(0,n.trial)
for(i in 1:n.trial){
  count = count +1
  q = Estep(data.matrix(data),mix.pi,mu,sigma,K);
  result = Mstep(TRUE,q,data.matrix(data),K,N,dim);
  new.mix.pi =result[[1]]; new.mu =result[[2]]; new.sigma =result[[3]];
  error =sum((new.mix.pi-mix.pi)^2) + sum((new.mu-mu)^2) + sum((new.sigma-sigma)^2)
  errorlist[i] = error
  mix.pi = new.mix.pi;
  mu = new.mu;
  sigma =new.sigma;
plot(ellipse(sigma[,,1],level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="red")
par(new=T)
plot(ellipse(sigma[,,2],level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="blue")
par(new=T)
plot(ellipse(sigma[,,3],level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="green")
par(new=T)
plot(ellipse(sigma[,,4],level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="maroon")
par(new=T)
plot(data,col=toast,axes=F,ylim=c(-12,12),xlim=c(-12,12))

points(t(mu[,1]),pch=22,col="red")
points(t(mu[,2]),pch=22,col="red")
points(t(mu[,3]),pch=22,col="red")
points(t(mu[,4]),pch=22,col="red")


  if (error < 1e-5) break;

}
plot(errorlist[1:count],log="y")

# install.packages("EMCluster")
# library(EMCluster)
# plotem(result,data)
# sigma.inv=solve(result[[3]][,,1])
# center=result[[2]][,1]
# ellipse <- function(s,t) {u<-c(s,t)-center; u %*% sigma.inv %*% u / 2}
# n <- 50
# x <- (0:(n-1)) * (50/(n-1))
# y <- (0:(n-1)) * (5/(n-1))
# z <- mapply(ellipse, as.vector(rep(x,n)), as.vector(outer(rep(0,n), y, `+`)))
# plot(data, pch=20, xlim=c(-10,10), ylim=c(-10,10), xlab="Packets", ylab="Flows")
# contour(x,y,matrix(z,n,n), levels=(0:10), col = terrain.colors(11), add=TRUE)

# install.packages("mixtools")
# library(mixtools)
# ellipse(mu=center, sigma=result[[3]][,,1], alpha = .1,  col="red")

install.packages("ellipse")
library(ellipse)


plot(ellipse(sigma[,,1],level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="yellow")
par(new=T)
plot(ellipse(sigma[,,2],level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="red")
par(new=T)
plot(ellipse(sigma[,,3],level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="maroon")
par(new=T)
plot(ellipse(sigma[,,4],level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-12,12),col="green")
par(new=T)
plot(data,col=toast,axes=F,ylim=c(-12,12),xlim=c(-12,12))


# plot(ellipse(diag(diag(sigma[,,1])),level=0.90,centre=t(mu[,1])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="yellow")
# par(new=T)
# plot(ellipse(diag(diag(sigma[,,2])),level=0.90,centre=t(mu[,2])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="red")
# par(new=T)
# plot(ellipse(diag(diag(sigma[,,3])),level=0.90,centre=t(mu[,3])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="maroon")
# par(new=T)
# plot(ellipse(diag(diag(sigma[,,4])),level=0.90,centre=t(mu[,4])),type="l",ylim=c(-12,12),xlim=c(-10,10),col="green")
# par(new=T)
# plot(data,axes=F,ylim=c(-12,12),xlim=c(-10,10))


foncfonc=function(n){
    for(i in 1:n) print(i+1)
}