Advertisement
Prithwijit21

Untitled

May 15th, 2023
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 9.31 KB | Source Code | 0 0
  1. rm(list = ls())
  2. gc(reset = TRUE)
  3. #repl_python()
  4. ##Now we will start our project
  5. #1. Loading packages
  6. #R packages
  7. library(cellWise)
  8. library(GSE)
  9. library(mvtnorm)
  10. library(parallel)
  11. library(foreach)
  12. library(doParallel)
  13. library(ggplot2)
  14. library(tictoc)
  15. library(beepr)
  16. library(xtable)
  17.  
  18. ##########
  19. #Defining the Global Variables
  20.  
  21. loc = 2
  22. sca = 5
  23. n_train = 200
  24. d_train = 25
  25. percent = 0.05
  26. Mu_train1 = numeric(d_train)
  27. Sigma_train1 = diag(d_train)
  28. Mu_train2 = numeric(d_train)+loc
  29. Sigma_train2 = sca*diag(d_train)
  30. n_test = 500
  31. d_test = d_train
  32. Mu_test1 = numeric(d_test)
  33. Sigma_test1 = diag(d_test)
  34. Mu_test2 = numeric(d_test)+loc
  35. Sigma_test2 = sca*diag(d_test)
  36. ##Creating the
  37. r = 10
  38. gamma = c(1:10)*10
  39.  
  40.  
  41. gamma_Data = list()
  42.  
  43.  
  44. label = c(replicate(n_train,"1"),replicate(n_train,"2"))
  45. Tlabel = c(replicate(n_test,"1"),replicate(n_test,"2"))
  46.  
  47. ### Calculation for the Bayes Classifier
  48. B = numeric(100)
  49.  
  50. for(i in 1:100)
  51. {
  52.   class_1 = rmvnorm(n_test,mean = Mu_test1,sigma = Sigma_test1)
  53.   class_2 = rmvnorm(n_test,mean = Mu_test2,sigma = Sigma_test2)
  54.   class = rbind(class_1,class_2)
  55.   class = apply(X = class, MARGIN = c(1,2), FUN = as.numeric)
  56.  
  57.   class
  58.   plot(class,col = Tlabel)
  59.   Bayes = ifelse(dmvnorm(class,mean = Mu_test1,sigma = Sigma_test1)>
  60.                    dmvnorm(class,mean = Mu_test2,sigma = Sigma_test2),"1","2")
  61.  
  62.   B[i] = mean(Bayes!=Tlabel)
  63. }
  64.  
  65. mean(B)
  66. sd(B)
  67.  
  68. LDA_mean = matrix(0,nrow = length(gamma),ncol = 4)
  69. QDA_mean = matrix(0,nrow = length(gamma),ncol = 4)
  70.  
  71.  
  72.  
  73.  
  74. tic()
  75. distance = function(x,mu,S){sqrt(mahalanobis(x,mu,S))}  ##Calculating Mahalanobis Distance
  76. depth = function(x,mu,S){1/(1+sqrt(mahalanobis(x,mu,S)))}
  77. est = function(x,estimate){x[which(is.na(x))] = estimate[which(is.na(x))]
  78. return(x)
  79. }
  80. ncores = detectCores(logical = FALSE)-2
  81.  
  82.  
  83. for(j in 1:length(gamma)){
  84.   tic()
  85.   ##****Fitting Machine Learning Models*****
  86.   ##*
  87.   ##'[Preparing the Dataset]
  88.   ##*
  89.   Data = function()
  90.   {
  91.     DataMatrix1 = generateData(n = n_train,d  =d_train,mu = Mu_train1,S = Sigma_train1,
  92.                                perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
  93.     Mu1_Mean = apply(DataMatrix1,FUN = mean,MARGIN = 2)
  94.     Mu1_Mean
  95.     Sigma1_S = cov(DataMatrix1)
  96.     #cellMap(DataMatrix1,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
  97.     DataMatrix1 = apply(DataMatrix1,c(1,2),as.numeric)
  98.     estim1_MCD = cellMCD(DataMatrix1)
  99.     Mu1_MCD = estim1_MCD$mu              ##Estimate of Mu
  100.     Sigma1_MCD = estim1_MCD$S           ##Estimate of Sigma
  101.    
  102.     #2SGS
  103.     estim1_2SGS = TSGS(DataMatrix1)
  104.     Mu1_2SGS = getLocation(estim1_2SGS)              ##Estimate of Mu
  105.     Sigma1_2SGS = getScatter(estim1_2SGS)           ##Estimate of Sigma
  106.     #DI
  107.     estim1_DI = DI(DataMatrix1)
  108.     Mu1_DI = estim1_DI$center              ##Estimate of Mu
  109.     Sigma1_DI = estim1_DI$cov           ##Estimate of Sigma
  110.    
  111.    
  112.    
  113.     ##2.Train Data(For class-2)
  114.     DataMatrix2 = rmvnorm(n = n_train,mean = Mu_train2,sigma = Sigma_train2)
  115.     Mu2_Mean = apply(DataMatrix2,FUN = mean,MARGIN = 2)
  116.     Sigma2_S = cov(DataMatrix2)
  117.     #cellMap(DataMatrix2,rowlabels = 1:n_train,nrowsinblock = 4,drawCircles = FALSE)
  118.     DataMatrix2 = apply(DataMatrix2,c(1,2),as.numeric)
  119.     estim2_MCD = cellMCD(DataMatrix2)
  120.     Mu2_MCD = estim2_MCD$mu              ##Estimate of Mu
  121.     Sigma2_MCD = estim2_MCD$S           ##Estimate of Sigma
  122.     #2SGS
  123.     estim2 = TSGS(DataMatrix2)
  124.     Mu2_2SGS = getLocation(estim2)              ##Estimate of Mu
  125.     Sigma2_2SGS = getScatter(estim2)          ##Estimate of Sigma
  126.     #DI
  127.     estim2_DI = DI(DataMatrix2)
  128.     Mu2_DI = estim2_DI$center              ##Estimate of Mu
  129.     Sigma2_DI = estim2_DI$cov           ##Estimate of Sigma
  130.    
  131.     DataMatrix = rbind(DataMatrix1,DataMatrix2)
  132.     DataMatrix = apply(DataMatrix,c(1,2),FUN = as.numeric)
  133.    
  134.    
  135.     ##Test Data(For Class-1)
  136.     #cellMap(TDataMatrix1,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
  137.     TDataMatrix1 = generateData(n = n_test,d  =d_test,mu = Mu_test1,S = Sigma_test1,
  138.                                 perout = percent,gamma = gamma[j],outlierType = "cellwisePlain")$X
  139.     TDataMatrix1 = apply(TDataMatrix1,c(1,2),as.numeric)
  140.    
  141.     ##2.Test Data(For class-2)
  142.     #cellMap(TDataMatrix2,rowlabels = 1:n_test,nrowsinblock = 10,drawCircles = FALSE)
  143.     TDataMatrix2 = rmvnorm(n = n_test,mean = Mu_test2,sigma = Sigma_test2)
  144.     TDataMatrix2 = apply(TDataMatrix2,c(1,2), as.numeric)
  145.    
  146.    
  147.     TDataMatrix = rbind(TDataMatrix1,TDataMatrix2)
  148.     TDataMatrix = apply(TDataMatrix,c(1,2),as.numeric)
  149.    
  150.     L = list(Mu1_MCD,Sigma1_MCD,Mu2_MCD,Sigma2_MCD,
  151.              Mu1_2SGS,Sigma1_2SGS,Mu2_2SGS,Sigma2_2SGS,
  152.              Mu1_DI,Sigma1_DI,Mu2_DI,Sigma2_DI,
  153.              Mu1_Mean,Sigma1_S,Mu2_Mean,Sigma2_S,
  154.              DataMatrix,TDataMatrix)
  155.     return(L)
  156.   }
  157.  
  158.  
  159. DA = function(G,mu1,sigma1,mu2,sigma2){
  160.   D1 = apply(X = G[[18]],FUN = mahalanobis,center = mu1,cov = sigma1,MARGIN = 1)
  161.   D2 = apply(X = G[[18]],FUN = mahalanobis,center = mu2,cov = sigma2,MARGIN = 1)
  162.   pred = ifelse(D1>D2,"1","2")
  163.   mis = 1 - (sum(Tlabel!=pred)/length(Tlabel))
  164.   return(mis)
  165. }
  166.  
  167.  
  168.  
  169.  
  170.   ##****Fitting Machine Learning Models*****
  171.   ##*
  172.   ##'[Defining Function]
  173.   ##*
  174.  
  175.   Model = function(G)
  176.   {
  177.     LDA = numeric(4)
  178.     ##LDA(reduced Data)
  179.     S24 = ((n_test-1)*G[[2]]+(n_test-1)*G[[4]])/(2*n_test-2)
  180.     S68 = ((n_test-1)*G[[6]]+(n_test-1)*G[[8]])/(2*n_test-2)
  181.     S1012 = ((n_test-1)*G[[10]]+(n_test-1)*G[[12]])/(2*n_test-2)
  182.     S1416 = ((n_test-1)*G[[14]]+(n_test-1)*G[[16]])/(2*n_test-2)
  183.     LDA[1] = DA(G,mu1 = G[[1]],sigma1 = S24,mu2 = G[[3]],sigma2 = S24)
  184.     LDA[2] = DA(G,mu1 = G[[5]],sigma1 = S68,mu2 = G[[7]],sigma2 = S68)
  185.     LDA[3] = DA(G,mu1 = G[[9]],sigma1 = S1012,mu2 = G[[11]],sigma2 = S1012)
  186.     LDA[4] = DA(G,mu1 = G[[13]],sigma1 = S1416,mu2 = G[[15]],sigma2 = S1416)
  187.     ##############################################################################
  188.     QDA = numeric(4)
  189.     ##############################################################################
  190.     ##QDA(reduced data)
  191.     QDA[1] = DA(G,mu1 = G[[1]],sigma1 = G[[2]],mu2 = G[[3]],sigma2 = G[[4]])
  192.     QDA[2] = DA(G,mu1 = G[[5]],sigma1 = G[[6]],mu2 = G[[7]],sigma2 = G[[8]])
  193.     QDA[3] = DA(G,mu1 = G[[9]],sigma1 = G[[10]],mu2 = G[[11]],sigma2 = G[[12]])
  194.     QDA[4] = DA(G,mu1 = G[[13]],sigma1 = G[[14]],mu2 = G[[15]],sigma2 = G[[16]])
  195.     ##############################################################################
  196.     toc()
  197.     return(list(LDA,QDA))
  198.   }
  199.   tic()
  200.   LDA = matrix(0,r,4)
  201.   QDA = matrix(0,r,4)
  202.   cl = makePSOCKcluster(2)
  203.   registerDoParallel(cl,cores = 2)
  204.   foreach(i=1:r) %do%
  205.     {
  206.       G = Data()
  207.       print(paste("Number of iterations:",i))
  208.       LDA[i,] = Model(G)[[1]]
  209.       QDA[i,] = Model(G)[[2]]
  210.       print(paste("Number of iterations:",i))
  211.     }
  212.   stopCluster(cl)
  213.   toc()
  214.   beep(2)
  215.  
  216.   ME = matrix(0,4,4)
  217.   LDAMean = apply(LDA,FUN = mean,MARGIN = 2)
  218.   LDASd = apply(LDA,FUN = sd,MARGIN = 2)
  219.  
  220.   QDAMean = apply(QDA,FUN = mean,MARGIN = 2)
  221.   QDASd = apply(QDA,FUN = sd,MARGIN = 2)
  222.  
  223.   ME[1,] = LDAMean
  224.   ME[2,] = LDASd
  225.   ME[3,] = QDAMean
  226.   ME[4,] = QDASd
  227.   rownames(ME) = c("LDA","sd","QDA","sd")
  228.   colnames(ME) = c("MCD","2SGS","DI","Non-Robust")
  229.   ME = apply(ME,MARGIN = c(1,2),as.numeric)
  230.   print(ME)
  231.   #xtable(ME,digits = 5)
  232.  
  233.  
  234.   gamma_Data = append(gamma_Data,list(ME))
  235.  
  236.  
  237.  
  238.  
  239.  
  240.  
  241.   par(mfrow = c(1,2),main = paste("Boxplot for gamma = ",gamma[j]))
  242.  
  243.   LDA = data.frame(LDA)
  244.   colnames(LDA) = c("MCD","2SGS","DI","NR")
  245.   boxplot(LDA,main = "boxplot for LDA",ylab = paste("Misclassification Error"))
  246.  
  247.  
  248.   QDA = data.frame(QDA)
  249.   colnames(QDA) = c("MCD","2SGS","DI","NR")
  250.   boxplot(QDA,main = "boxplot for QDA",ylab = paste("Misclassification Error"))
  251.  
  252.  
  253.   LDA_mean[j,] = LDAMean
  254.   QDA_mean[j,] = QDAMean
  255. }
  256. toc()  
  257.  
  258.  
  259. saveRDS(gamma_Data,file = "N_0.05_Loc_5.rds")
  260.  
  261. # Create a line plot using ggplot2
  262. ####################################
  263. #########plot for LDA###############
  264. ####################################
  265. LDA_mean = data.frame(gamma,LDA_mean)
  266. ggplot(LDA_mean, aes(x=gamma)) +
  267.   geom_line(aes(y=X1, color="MCD")) +
  268.   geom_point(aes(y=X1, color="MCD"))+
  269.   geom_line(aes(y=X2, color="2SGS")) +
  270.   geom_point(aes(y=X2, color="2SGS")) +
  271.   geom_line(aes(y=X3, color="DI"))+
  272.   geom_point(aes(y=X3, color="DI"))+
  273.   geom_line(aes(y=X4, color="NR"))+
  274.   geom_point(aes(y=X4, color="NR"))+
  275.   scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
  276.   xlab(expression(gamma))+ylab("Misclassification Error")+
  277.   ggtitle("Plot for LDA with Different Values of gamma")
  278.  
  279.  
  280. #####################################
  281. ###########plot for QDA##############
  282. #####################################
  283. QDA_mean = data.frame(gamma,QDA_mean)
  284. ggplot(QDA_mean, aes(x=gamma)) +
  285.   geom_line(aes(y=X1, color="MCD")) +
  286.   geom_point(aes(y=X1, color="MCD"))+
  287.   geom_line(aes(y=X2, color="2SGS")) +
  288.   geom_point(aes(y=X2, color="2SGS")) +
  289.   geom_line(aes(y=X3, color="DI"))+
  290.   geom_point(aes(y=X3, color="DI"))+
  291.   geom_line(aes(y=X4, color="NR"))+
  292.   geom_point(aes(y=X4, color="NR"))+
  293.   scale_color_manual(values=c("MCD"="red", "2SGS"="blue","DI" = "green","NR" = "darkviolet"))+
  294.   xlab(expression(gamma))+ylab("Misclassification Error")+
  295.   ggtitle("Plot for QDA with Different Values of gamma")
  296.  
  297.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement