Advertisement
Guest User

Special_task 1

a guest
Jan 20th, 2020
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.42 KB | None | 0 0
  1. ```{r}
  2. library("readxl")
  3. library(glmnet)
  4. data <- read_excel("spambase.xlsx")
  5. #set.seed(12345)
  6. #id <- sample(1:n, floor(n*0.5))
  7. #train <- data[id,]
  8. #test <- data[-id,]
  9.  
  10. B <- 100
  11. n <- nrow(data)
  12. #b1 <- list()
  13. #b2 <- list()
  14.  
  15. #for (i in 1:B)
  16. #{
  17. # b1[[i]] <- train[sample(1:nrow(train), n, replace = TRUE),]
  18. # b2[[i]] <- train[sample(1:nrow(train), n, replace = TRUE),]
  19. #}
  20.  
  21. p <- ncol(data[,!names(data) %in% c("Spam")])
  22. q1 <- round(p/2)
  23. q2 <- round(p/2)
  24. nfolds <- 4
  25.  
  26. #lambda=seq(0,10,length.out=1000)
  27.  
  28. beta_hat <- rep(0, p)
  29. names(beta_hat) <- colnames(data[,!names(data) %in% c("Spam")])
  30.  
  31. counts <- rep(0, p) # NEW LINE
  32. names(counts) <- colnames(data[,!names(data) %in% c("Spam")]) # NEW LINE
  33. set.seed(12345) # NEW LINE
  34.  
  35. for (i in 1:B)
  36. {
  37. #b <- b1[[i]]
  38. b <- data[sample(n, n, replace = TRUE),] # NEW LINE
  39.  
  40. x <- as.matrix(b[,!names(b) %in% c("Spam")])
  41. #set.seed(12345)
  42. x <- x[,sample(1:ncol(x), q1)]
  43. y <- as.matrix(b[,"Spam"])
  44. lasso_cv <- cv.glmnet(x=x,y=y, alpha=1, nfolds=nfolds, family = "binomial")
  45. #lambda_min <- lasso_cv$lambda.min
  46. #lasso <- glmnet(x=x, y=y, alpha = 1, lambda = lambda_min)
  47. #coef <- coef(lasso)
  48. coef <- coef(lasso_cv, s = "lambda.min") # NEW LINE
  49.  
  50. dimnames <- coef@Dimnames[1][[1]]
  51. for (j in 2:length(coef))
  52. {
  53. beta_hat[dimnames[j]] <- beta_hat[dimnames[j]]+coef[j]
  54. counts[dimnames[j]] <- counts[dimnames[j]]+1 # NEW LINE
  55. }
  56. }
  57. # for ( i in 1:length(counts)){
  58. # counts[i] <- ifelse(counts[i] == 0, 1, counts[i]) # NEW LINE
  59. # }
  60.  
  61.  
  62. importance <- abs(beta_hat/counts)
  63. beta_hat <- rep(0, p)
  64. names(beta_hat) <- colnames(data[,!names(data) %in% c("Spam")])
  65. intercept <- 0
  66. counts <- rep(0, p) # NEW LINE
  67. names(counts) <- colnames(data[,!names(data) %in% c("Spam")]) # NEW LINE
  68. set.seed(12345) # NEW LINE
  69.  
  70.  
  71. for (i in 1:B)
  72. {
  73. #b <- b2[[i]]
  74.  
  75. b <- data[sample(n, n, replace = TRUE),] # NEW LINE
  76.  
  77. x <- as.matrix(b[,!names(b) %in% c("Spam")])
  78. #set.seed(12345)
  79. x <- x[,sample(1:ncol(x), q2, prob = importance)]
  80. y <- as.matrix(b[,"Spam"])
  81. lasso_cv <- cv.glmnet(x=x,y=y, alpha=1, nfolds=nfolds, family = "binomial")
  82. #lambda_min <- lasso_cv$lambda.min
  83. #lasso <- glmnet(x=x, y=y, alpha = 1, lambda = lambda_min)
  84. #coef <- coef(lasso)
  85. coef <- coef(lasso_cv, s = "lambda.min") # NEW LINE
  86.  
  87. dimnames <- coef@Dimnames[1][[1]]
  88. for (j in 2:length(coef))
  89. {
  90. beta_hat[dimnames[j]] <- beta_hat[dimnames[j]]+coef[j]
  91. counts[dimnames[j]] <- counts[dimnames[j]]+1 # NEW LINE
  92. }
  93. intercept <- intercept + coef[1]
  94. }
  95.  
  96. for ( i in 1:length(counts)){
  97. counts[i] <- ifelse(counts[i] == 0, 1, counts[i]) # NEW LINE
  98. }
  99.  
  100. beta_hat <- beta_hat/counts
  101. intercept <- intercept/B
  102. ```
  103.  
  104. ```{r}
  105. predict_own <- function(x, weights, intercept)
  106. {
  107. predictions <- vector(length = nrow(x))
  108. for (i in 1:nrow(x))
  109. {
  110. #predictions[i] <- ifelse(sum(x[i,]*weights)+intercept < 0.5, 0, 1)
  111. odds <- sum(x[i,]*weights)+intercept # NEW LINE
  112. prob <- 1/(1+exp(-odds)) # NEW LINE
  113. predictions[i] <- ifelse(prob > 0.5, 0, 1) # NEW LINE
  114.  
  115. }
  116. return(predictions)
  117. }
  118.  
  119. x <- data[,!names(data) %in% c("Spam")]
  120. y <- t(data[,"Spam"])
  121. predictions <- predict_own(x, beta_hat, intercept)
  122.  
  123. importance <- sort(importance, decreasing = TRUE)
  124. barplot(importance[1:10], ylab = "Importance", xlab = "Feature")
  125. confusion_matrix <- table(y, predictions)
  126. confusion_matrix
  127. cat("Accuracy: ", sum(diag(confusion_matrix))/sum(confusion_matrix))
  128. ```
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement