Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rm(list = ls())
- gc(reset = TRUE)
- #repl_python()
- ##Now we will start our project
- #1. Loading packages
- #R packages
- library(cellWise)
- library(GSE)
- library(mvtnorm)
- library(parallel)
- library(foreach)
- library(doParallel)
- library(ggplot2)
- library(tictoc)
- library(beepr)
- library(xtable)
- ##########
- #Defining the Global Variables
- loc = 2
- sca = 5
- n_train = 200
- d_train = 25
- percent = 0.05
- Mu_train1 = numeric(d_train)
- Sigma_train1 = diag(d_train)
- Mu_train2 = numeric(d_train)+loc
- Sigma_train2 = sca*diag(d_train)
- n_test = 500
- d_test = d_train
- Mu_test1 = numeric(d_test)
- Sigma_test1 = diag(d_test)
- Mu_test2 = numeric(d_test)+loc
- Sigma_test2 = sca*diag(d_test)
- ##Creating the
- r = 10
- gamma = c(1:10)*10
- gamma_Data = list()
- label = c(replicate(n_train,"1"),replicate(n_train,"2"))
- Tlabel = c(replicate(n_test,"1"),replicate(n_test,"2"))
- ### Calculation for the Bayes Classifier
- B = numeric(100)
- for(i in 1:100)
- {
- class_1 = rmvnorm(n_test,mean = Mu_test1,sigma = Sigma_test1)
- class_2 = rmvnorm(n_test,mean = Mu_test2,sigma = Sigma_test2)
- class = rbind(class_1,class_2)
- class = apply(X = class, MARGIN = c(1,2), FUN = as.numeric)
- class
- plot(class,col = Tlabel)
- Bayes = ifelse(dmvnorm(class,mean = Mu_test1,sigma = Sigma_test1)>
- dmvnorm(class,mean = Mu_test2,sigma = Sigma_test2),"1","2")
- B[i] = mean(Bayes!=Tlabel)
- }
- mean(B)
- sd(B)
- LDA_mean = matrix(0,nrow = length(gamma),ncol = 4)
- QDA_mean = matrix(0,nrow = length(gamma),ncol = 4)
- tic()
- distance = function(x,mu,S){sqrt(mahalanobis(x,mu,S))} ##Calculating Mahalanobis Distance
- depth = function(x,mu,S){1/(1+sqrt(mahalanobis(x,mu,S)))}
- est = function(x,estimate){x[which(is.na(x))] = estimate[which(is.na(x))]
- return(x)
- }
- ncores = detectCores(logical = FALSE)-2
- for(j in 1:length(gamma)){
- tic()
- ##****Fitting Machine Learning Models*****
- ##*
- ##'[Preparing the Dataset]
- ##*
- Data = function()
- {
- DataMatrix1 = generateData(n = n_train,d =d_train,mu = Mu_train1,S = Sigma_train1,
- perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
- Mu1_Mean = apply(DataMatrix1,FUN = mean,MARGIN = 2)
- Mu1_Mean
- Sigma1_S = cov(DataMatrix1)
- #cellMap(DataMatrix1,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
- DataMatrix1 = apply(DataMatrix1,c(1,2),as.numeric)
- estim1_MCD = cellMCD(DataMatrix1)
- Mu1_MCD = estim1_MCD$mu ##Estimate of Mu
- Sigma1_MCD = estim1_MCD$S ##Estimate of Sigma
- #2SGS
- estim1_2SGS = TSGS(DataMatrix1)
- Mu1_2SGS = getLocation(estim1_2SGS) ##Estimate of Mu
- Sigma1_2SGS = getScatter(estim1_2SGS) ##Estimate of Sigma
- #DI
- estim1_DI = DI(DataMatrix1)
- Mu1_DI = estim1_DI$center ##Estimate of Mu
- Sigma1_DI = estim1_DI$cov ##Estimate of Sigma
- ##2.Train Data(For class-2)
- DataMatrix2 = rmvnorm(n = n_train,mean = Mu_train2,sigma = Sigma_train2)
- Mu2_Mean = apply(DataMatrix2,FUN = mean,MARGIN = 2)
- Sigma2_S = cov(DataMatrix2)
- #cellMap(DataMatrix2,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
- DataMatrix2 = apply(DataMatrix2,c(1,2),as.numeric)
- estim2_MCD = cellMCD(DataMatrix2)
- Mu2_MCD = estim2_MCD$mu ##Estimate of Mu
- Sigma2_MCD = estim2_MCD$S ##Estimate of Sigma
- #2SGS
- estim2 = TSGS(DataMatrix2)
- Mu2_2SGS = getLocation(estim2) ##Estimate of Mu
- Sigma2_2SGS = getScatter(estim2) ##Estimate of Sigma
- #DI
- estim2_DI = DI(DataMatrix2)
- Mu2_DI = estim2_DI$center ##Estimate of Mu
- Sigma2_DI = estim2_DI$cov ##Estimate of Sigma
- DataMatrix = rbind(DataMatrix1,DataMatrix2)
- DataMatrix = apply(DataMatrix,c(1,2),FUN = as.numeric)
- ##Test Data(For Class-1)
- #cellMap(TDataMatrix1,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
- TDataMatrix1 = generateData(n = n_test,d =d_test,mu = Mu_test1,S = Sigma_test1,
- perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
- TDataMatrix1 = apply(TDataMatrix1,c(1,2),as.numeric)
- ##2.Test Data(For class-2)
- #cellMap(TDataMatrix2,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
- TDataMatrix2 = rmvnorm(n = n_test,mean = Mu_test2,sigma = Sigma_test2)
- TDataMatrix2 = apply(TDataMatrix2,c(1,2), as.numeric)
- TDataMatrix = rbind(TDataMatrix1,TDataMatrix2)
- TDataMatrix = apply(TDataMatrix,c(1,2),as.numeric)
- L = list(Mu1_MCD,Sigma1_MCD,Mu2_MCD,Sigma2_MCD,
- Mu1_2SGS,Sigma1_2SGS,Mu2_2SGS,Sigma2_2SGS,
- Mu1_DI,Sigma1_DI,Mu2_DI,Sigma2_DI,
- Mu1_Mean,Sigma1_S,Mu2_Mean,Sigma2_S,
- DataMatrix,TDataMatrix)
- return(L)
- }
- DA = function(G,mu1,sigma1,mu2,sigma2){
- D1 = apply(X = G[[18]],FUN = mahalanobis,center = mu1,cov = sigma1,MARGIN = 1)
- D2 = apply(X = G[[18]],FUN = mahalanobis,center = mu2,cov = sigma2,MARGIN = 1)
- pred = ifelse(D1>D2,"1","2")
- mis = 1 - (sum(Tlabel!=pred)/length(Tlabel))
- return(mis)
- }
- ##****Fitting Machine Learning Models*****
- ##*
- ##'[Defining Function]
- ##*
- Model = function(G)
- {
- LDA = numeric(4)
- ##LDA(reduced Data)
- S24 = ((n_test-1)*G[[2]]+(n_test-1)*G[[4]])/(2*n_test-2)
- S68 = ((n_test-1)*G[[6]]+(n_test-1)*G[[8]])/(2*n_test-2)
- S1012 = ((n_test-1)*G[[10]]+(n_test-1)*G[[12]])/(2*n_test-2)
- S1416 = ((n_test-1)*G[[14]]+(n_test-1)*G[[16]])/(2*n_test-2)
- LDA[1] = DA(G,mu1 = G[[1]],sigma1 = S24,mu2 = G[[3]],sigma2 = S24)
- LDA[2] = DA(G,mu1 = G[[5]],sigma1 = S68,mu2 = G[[7]],sigma2 = S68)
- LDA[3] = DA(G,mu1 = G[[9]],sigma1 = S1012,mu2 = G[[11]],sigma2 = S1012)
- LDA[4] = DA(G,mu1 = G[[13]],sigma1 = S1416,mu2 = G[[15]],sigma2 = S1416)
- ##############################################################################
- QDA = numeric(4)
- ##############################################################################
- ##QDA(reduced data)
- QDA[1] = DA(G,mu1 = G[[1]],sigma1 = G[[2]],mu2 = G[[3]],sigma2 = G[[4]])
- QDA[2] = DA(G,mu1 = G[[5]],sigma1 = G[[6]],mu2 = G[[7]],sigma2 = G[[8]])
- QDA[3] = DA(G,mu1 = G[[9]],sigma1 = G[[10]],mu2 = G[[11]],sigma2 = G[[12]])
- QDA[4] = DA(G,mu1 = G[[13]],sigma1 = G[[14]],mu2 = G[[15]],sigma2 = G[[16]])
- ##############################################################################
- toc()
- return(list(LDA,QDA))
- }
- tic()
- LDA = matrix(0,r,4)
- QDA = matrix(0,r,4)
- cl = makePSOCKcluster(2)
- registerDoParallel(cl,cores = 2)
- foreach(i=1:r) %do%
- {
- G = Data()
- print(paste("Number of iterations:",i))
- LDA[i,] = Model(G)[[1]]
- QDA[i,] = Model(G)[[2]]
- print(paste("Number of iterations:",i))
- }
- stopCluster(cl)
- toc()
- beep(2)
- ME = matrix(0,4,4)
- LDAMean = apply(LDA,FUN = mean,MARGIN = 2)
- LDASd = apply(LDA,FUN = sd,MARGIN = 2)
- QDAMean = apply(QDA,FUN = mean,MARGIN = 2)
- QDASd = apply(QDA,FUN = sd,MARGIN = 2)
- ME[1,] = LDAMean
- ME[2,] = LDASd
- ME[3,] = QDAMean
- ME[4,] = QDASd
- rownames(ME) = c("LDA","sd","QDA","sd")
- colnames(ME) = c("MCD","2SGS","DI","Non-Robust")
- ME = apply(ME,MARGIN = c(1,2),as.numeric)
- print(ME)
- #xtable(ME,digits = 5)
- gamma_Data = append(gamma_Data,list(ME))
- par(mfrow = c(1,2),main = paste("Boxplot for gamma = ",gamma[j]))
- LDA = data.frame(LDA)
- colnames(LDA) = c("MCD","2SGS","DI","NR")
- boxplot(LDA,main = "boxplot for LDA",ylab = paste("Misclassification Error"))
- QDA = data.frame(QDA)
- colnames(QDA) = c("MCD","2SGS","DI","NR")
- boxplot(QDA,main = "boxplot for QDA",ylab = paste("Misclassification Error"))
- LDA_mean[j,] = LDAMean
- QDA_mean[j,] = QDAMean
- }
- toc()
- saveRDS(gamma_Data,file = "N_0.05_Loc_5.rds")
- # Create a line plot using ggplot2
- ####################################
- #########plot for LDA###############
- ####################################
- LDA_mean = data.frame(gamma,LDA_mean)
- ggplot(LDA_mean, aes(x=gamma)) +
- geom_line(aes(y=X1, color="MCD")) +
- geom_point(aes(y=X1, color="MCD"))+
- geom_line(aes(y=X2, color="2SGS")) +
- geom_point(aes(y=X2, color="2SGS")) +
- geom_line(aes(y=X3, color="DI"))+
- geom_point(aes(y=X3, color="DI"))+
- geom_line(aes(y=X4, color="NR"))+
- geom_point(aes(y=X4, color="NR"))+
- scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
- xlab(expression(gamma))+ylab("Misclassification Error")+
- ggtitle("Plot for LDA with Different Values of gamma")
- #####################################
- ###########plot for QDA##############
- #####################################
- QDA_mean = data.frame(gamma,QDA_mean)
- ggplot(QDA_mean, aes(x=gamma)) +
- geom_line(aes(y=X1, color="MCD")) +
- geom_point(aes(y=X1, color="MCD"))+
- geom_line(aes(y=X2, color="2SGS")) +
- geom_point(aes(y=X2, color="2SGS")) +
- geom_line(aes(y=X3, color="DI"))+
- geom_point(aes(y=X3, color="DI"))+
- geom_line(aes(y=X4, color="NR"))+
- geom_point(aes(y=X4, color="NR"))+
- scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
- xlab(expression(gamma))+ylab("Misclassification Error")+
- ggtitle("Plot for QDA with Different Values of gamma")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement