Untitled

rm(list = ls())
gc(reset = TRUE)
#repl_python()
##Now we will start our project
#1. Loading packages
#R packages
library(cellWise)
library(GSE)
library(mvtnorm)
library(parallel)
library(foreach)
library(doParallel)
library(ggplot2)
library(tictoc)
library(beepr)
library(xtable)

##########
#Defining the Global Variables

loc = 2
sca = 5
n_train = 200
d_train = 25
percent = 0.05
Mu_train1 = numeric(d_train)
Sigma_train1 = diag(d_train)
Mu_train2 = numeric(d_train)+loc
Sigma_train2 = sca*diag(d_train)
n_test = 500
d_test = d_train
Mu_test1 = numeric(d_test)
Sigma_test1 = diag(d_test)
Mu_test2 = numeric(d_test)+loc
Sigma_test2 = sca*diag(d_test)
##Creating the
r = 10
gamma = c(1:10)*10


gamma_Data = list()


label = c(replicate(n_train,"1"),replicate(n_train,"2"))
Tlabel = c(replicate(n_test,"1"),replicate(n_test,"2"))

### Calculation for the Bayes Classifier
B = numeric(100)

for(i in 1:100)
{
  class_1 = rmvnorm(n_test,mean = Mu_test1,sigma = Sigma_test1)
  class_2 = rmvnorm(n_test,mean = Mu_test2,sigma = Sigma_test2)
  class = rbind(class_1,class_2)
  class = apply(X = class, MARGIN = c(1,2), FUN = as.numeric)

  class
  plot(class,col = Tlabel)
  Bayes = ifelse(dmvnorm(class,mean = Mu_test1,sigma = Sigma_test1)>
                   dmvnorm(class,mean = Mu_test2,sigma = Sigma_test2),"1","2")

  B[i] = mean(Bayes!=Tlabel)
}

mean(B)
sd(B)

LDA_mean = matrix(0,nrow = length(gamma),ncol = 4)
QDA_mean = matrix(0,nrow = length(gamma),ncol = 4)


tic()
distance = function(x,mu,S){sqrt(mahalanobis(x,mu,S))}  ##Calculating Mahalanobis Distance
depth = function(x,mu,S){1/(1+sqrt(mahalanobis(x,mu,S)))}
est = function(x,estimate){x[which(is.na(x))] = estimate[which(is.na(x))]
return(x)
}
ncores = detectCores(logical = FALSE)-2


for(j in 1:length(gamma)){
  tic()
  ##****Fitting Machine Learning Models*****
  ##*
  ##'[Preparing the Dataset]
  ##*
  Data = function()
  {
    DataMatrix1 = generateData(n = n_train,d  =d_train,mu = Mu_train1,S = Sigma_train1,
                               perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
    Mu1_Mean = apply(DataMatrix1,FUN = mean,MARGIN = 2)
    Mu1_Mean
    Sigma1_S = cov(DataMatrix1)
    #cellMap(DataMatrix1,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
    DataMatrix1 = apply(DataMatrix1,c(1,2),as.numeric)
    estim1_MCD = cellMCD(DataMatrix1)
    Mu1_MCD = estim1_MCD$mu              ##Estimate of Mu
    Sigma1_MCD = estim1_MCD$S           ##Estimate of Sigma

    #2SGS
    estim1_2SGS = TSGS(DataMatrix1)
    Mu1_2SGS = getLocation(estim1_2SGS)              ##Estimate of Mu
    Sigma1_2SGS = getScatter(estim1_2SGS)           ##Estimate of Sigma
    #DI
    estim1_DI = DI(DataMatrix1)
    Mu1_DI = estim1_DI$center              ##Estimate of Mu
    Sigma1_DI = estim1_DI$cov           ##Estimate of Sigma


    ##2.Train Data(For class-2)
    DataMatrix2 = rmvnorm(n = n_train,mean = Mu_train2,sigma = Sigma_train2)
    Mu2_Mean = apply(DataMatrix2,FUN = mean,MARGIN = 2)
    Sigma2_S = cov(DataMatrix2)
    #cellMap(DataMatrix2,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
    DataMatrix2 = apply(DataMatrix2,c(1,2),as.numeric)
    estim2_MCD = cellMCD(DataMatrix2)
    Mu2_MCD = estim2_MCD$mu              ##Estimate of Mu
    Sigma2_MCD = estim2_MCD$S           ##Estimate of Sigma
    #2SGS
    estim2 = TSGS(DataMatrix2)
    Mu2_2SGS = getLocation(estim2)              ##Estimate of Mu
    Sigma2_2SGS = getScatter(estim2)          ##Estimate of Sigma
    #DI
    estim2_DI = DI(DataMatrix2)
    Mu2_DI = estim2_DI$center              ##Estimate of Mu
    Sigma2_DI = estim2_DI$cov           ##Estimate of Sigma

    DataMatrix = rbind(DataMatrix1,DataMatrix2)
    DataMatrix = apply(DataMatrix,c(1,2),FUN = as.numeric)


    ##Test Data(For Class-1)
    #cellMap(TDataMatrix1,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
    TDataMatrix1 = generateData(n = n_test,d  =d_test,mu = Mu_test1,S = Sigma_test1,
                                perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
    TDataMatrix1 = apply(TDataMatrix1,c(1,2),as.numeric)

    ##2.Test Data(For class-2)
    #cellMap(TDataMatrix2,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
    TDataMatrix2 = rmvnorm(n = n_test,mean = Mu_test2,sigma = Sigma_test2)
    TDataMatrix2 = apply(TDataMatrix2,c(1,2), as.numeric)


    TDataMatrix = rbind(TDataMatrix1,TDataMatrix2)
    TDataMatrix = apply(TDataMatrix,c(1,2),as.numeric)

    L = list(Mu1_MCD,Sigma1_MCD,Mu2_MCD,Sigma2_MCD,
             Mu1_2SGS,Sigma1_2SGS,Mu2_2SGS,Sigma2_2SGS,
             Mu1_DI,Sigma1_DI,Mu2_DI,Sigma2_DI,
             Mu1_Mean,Sigma1_S,Mu2_Mean,Sigma2_S,
             DataMatrix,TDataMatrix)
    return(L)
  }


DA = function(G,mu1,sigma1,mu2,sigma2){
  D1 = apply(X = G[[18]],FUN = mahalanobis,center = mu1,cov = sigma1,MARGIN = 1)
  D2 = apply(X = G[[18]],FUN = mahalanobis,center = mu2,cov = sigma2,MARGIN = 1)
  pred = ifelse(D1>D2,"1","2")
  mis = 1 - (sum(Tlabel!=pred)/length(Tlabel))
  return(mis)
}


  ##****Fitting Machine Learning Models*****
  ##*
  ##'[Defining Function]
  ##*

  Model = function(G)
  {
    LDA = numeric(4)
    ##LDA(reduced Data)
    S24 = ((n_test-1)*G[[2]]+(n_test-1)*G[[4]])/(2*n_test-2)
    S68 = ((n_test-1)*G[[6]]+(n_test-1)*G[[8]])/(2*n_test-2)
    S1012 = ((n_test-1)*G[[10]]+(n_test-1)*G[[12]])/(2*n_test-2)
    S1416 = ((n_test-1)*G[[14]]+(n_test-1)*G[[16]])/(2*n_test-2)
    LDA[1] = DA(G,mu1 = G[[1]],sigma1 = S24,mu2 = G[[3]],sigma2 = S24)
    LDA[2] = DA(G,mu1 = G[[5]],sigma1 = S68,mu2 = G[[7]],sigma2 = S68)
    LDA[3] = DA(G,mu1 = G[[9]],sigma1 = S1012,mu2 = G[[11]],sigma2 = S1012)
    LDA[4] = DA(G,mu1 = G[[13]],sigma1 = S1416,mu2 = G[[15]],sigma2 = S1416)
    ##############################################################################
    QDA = numeric(4)
    ##############################################################################
    ##QDA(reduced data)
    QDA[1] = DA(G,mu1 = G[[1]],sigma1 = G[[2]],mu2 = G[[3]],sigma2 = G[[4]])
    QDA[2] = DA(G,mu1 = G[[5]],sigma1 = G[[6]],mu2 = G[[7]],sigma2 = G[[8]])
    QDA[3] = DA(G,mu1 = G[[9]],sigma1 = G[[10]],mu2 = G[[11]],sigma2 = G[[12]])
    QDA[4] = DA(G,mu1 = G[[13]],sigma1 = G[[14]],mu2 = G[[15]],sigma2 = G[[16]])
    ##############################################################################
    toc()
    return(list(LDA,QDA))
  }
  tic()
  LDA = matrix(0,r,4)
  QDA = matrix(0,r,4)
  cl = makePSOCKcluster(2)
  registerDoParallel(cl,cores = 2)
  foreach(i=1:r) %do%
    {
      G = Data()
      print(paste("Number of iterations:",i))
      LDA[i,] = Model(G)[[1]]
      QDA[i,] = Model(G)[[2]]
      print(paste("Number of iterations:",i))
    }
  stopCluster(cl)
  toc()
  beep(2)

  ME = matrix(0,4,4)
  LDAMean = apply(LDA,FUN = mean,MARGIN = 2)
  LDASd = apply(LDA,FUN = sd,MARGIN = 2)

  QDAMean = apply(QDA,FUN = mean,MARGIN = 2)
  QDASd = apply(QDA,FUN = sd,MARGIN = 2)

  ME[1,] = LDAMean
  ME[2,] = LDASd
  ME[3,] = QDAMean
  ME[4,] = QDASd
  rownames(ME) = c("LDA","sd","QDA","sd")
  colnames(ME) = c("MCD","2SGS","DI","Non-Robust")
  ME = apply(ME,MARGIN = c(1,2),as.numeric)
  print(ME)
  #xtable(ME,digits = 5)


  gamma_Data = append(gamma_Data,list(ME))


  par(mfrow = c(1,2),main = paste("Boxplot for gamma = ",gamma[j]))

  LDA = data.frame(LDA)
  colnames(LDA) = c("MCD","2SGS","DI","NR")
  boxplot(LDA,main = "boxplot for LDA",ylab = paste("Misclassification Error"))


  QDA = data.frame(QDA)
  colnames(QDA) = c("MCD","2SGS","DI","NR")
  boxplot(QDA,main = "boxplot for QDA",ylab = paste("Misclassification Error"))


  LDA_mean[j,] = LDAMean
  QDA_mean[j,] = QDAMean
}
toc()


saveRDS(gamma_Data,file = "N_0.05_Loc_5.rds")

# Create a line plot using ggplot2
####################################
#########plot for LDA###############
####################################
LDA_mean = data.frame(gamma,LDA_mean)
ggplot(LDA_mean, aes(x=gamma)) +
  geom_line(aes(y=X1, color="MCD")) +
  geom_point(aes(y=X1, color="MCD"))+
  geom_line(aes(y=X2, color="2SGS")) +
  geom_point(aes(y=X2, color="2SGS")) +
  geom_line(aes(y=X3, color="DI"))+
  geom_point(aes(y=X3, color="DI"))+
  geom_line(aes(y=X4, color="NR"))+
  geom_point(aes(y=X4, color="NR"))+
  scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
  xlab(expression(gamma))+ylab("Misclassification Error")+
  ggtitle("Plot for LDA with Different Values of gamma")


#####################################
###########plot for QDA##############
#####################################
QDA_mean = data.frame(gamma,QDA_mean)
ggplot(QDA_mean, aes(x=gamma)) +
  geom_line(aes(y=X1, color="MCD")) +
  geom_point(aes(y=X1, color="MCD"))+
  geom_line(aes(y=X2, color="2SGS")) +
  geom_point(aes(y=X2, color="2SGS")) +
  geom_line(aes(y=X3, color="DI"))+
  geom_point(aes(y=X3, color="DI"))+
  geom_line(aes(y=X4, color="NR"))+
  geom_point(aes(y=X4, color="NR"))+
  scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
  xlab(expression(gamma))+ylab("Misclassification Error")+
  ggtitle("Plot for QDA with Different Values of gamma")