Guest User

Untitled

a guest
Jun 18th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.38 KB | None | 0 0
  1. > # Prep Training and Test data.
  2. > trainDataIndex <- sample(1:nrow(df), 0.7*nrow(df)) # 70% training data
  3. > trainData <- df[trainDataIndex, ]
  4. > testData <- df[-trainDataIndex, ]
  5. > set.seed(100)
  6. > trainData <-
  7. + trainData %>%
  8. + dplyr::mutate(CUST_REGION_DESCR =
  9. + forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
  10. > testData <-
  11. + testData %>%
  12. + dplyr::mutate(CUST_REGION_DESCR =
  13. + forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
  14. > str(trainData)
  15. 'data.frame': 693843 obs. of 4 variables:
  16. $ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 7 10 9 10 9 10 5 10 5 ...
  17. $ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 2 6 7 6 8 8 4 7 7 6 ...
  18. $ Sales : num 92.7 2356 39 239.6 26 ...
  19. $ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
  20. > str(testData)
  21. 'data.frame': 297362 obs. of 4 variables:
  22. $ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 5 9 9 9 9 3 3 5 3 ...
  23. $ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 3 3 6 6 7 6 7 2 2 4 ...
  24. $ Sales : num 150.2 68.5 68.1 72.1 60.1 ...
  25. $ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
  26.  
  27. > x = model.matrix(New_Product_Type ~.,data=trainData)
  28.  
  29. > cvfit = cv.glmnet(x, y=as.factor(trainData$New_Product_Type), alpha=1, family="binomial",type.measure = "mse")
  30.  
  31. > lambda_1se <- cvfit$lambda.1se
  32.  
  33. > coef(cvfit,s=lambda_1se)
  34. 23 x 1 sparse Matrix of class "dgCMatrix"
  35. 1
  36. (Intercept) 0.02946581
  37. (Intercept) .
  38. cust_prog_levelC 0.14012975
  39. cust_prog_levelD .
  40. cust_prog_levelE 0.13339906
  41. cust_prog_levelG -0.05325043
  42. cust_prog_levelI 0.21440592
  43. cust_prog_levelL 0.26273503
  44. cust_prog_levelM .
  45. cust_prog_levelN 0.26620261
  46. cust_prog_levelP -0.05166799
  47. cust_prog_levelR -0.33054803
  48. cust_prog_levelS .
  49. cust_prog_levelX 0.57508875
  50. cust_prog_levelZ 1.20748454
  51. CUST_REGION_DESCRMOUNTAIN WEST REGION -0.20993854
  52. CUST_REGION_DESCRNORTH CENTRAL REGION -0.04035331
  53. CUST_REGION_DESCRNORTH EAST REGION 0.01082858
  54. CUST_REGION_DESCROHIO VALLEY REGION 0.03077584
  55. CUST_REGION_DESCRSOUTH CENTRAL REGION .
  56. CUST_REGION_DESCRSOUTH EAST REGION 0.10606213
  57. CUST_REGION_DESCRWESTERN REGION -0.17587036
  58. Sales -0.01223843
  59.  
  60. > #get test data
  61. > x_test <- model.matrix(New_Product_Type~.,data = testData)
  62. > #predict New_Product_Type, type=”New_Product_Type”
  63. > lasso_prob <- predict(cvfit,newx = x_test,s=lambda_1se,type="response")
  64.  
  65. > #translate probabilities to predictions
  66. > lasso_predict <- rep("neg",nrow(testData))
  67. > lasso_predict[lasso_prob>.5] <- "pos"
  68. > #confusion matrix
  69. > table(pred=lasso_predict,true=testData$New_Product_Type)
  70. true
  71. pred 0 1
  72. neg 207840 60865
  73. pos 8697 19960
  74. > #accuracy
  75.  
  76. > lasso_predict[lasso_prob>.8] <- "pos"
  77. > #confusion matrix
  78. > table(pred=lasso_predict,true=testData$New_Product_Type)
  79. true
  80. pred 0 1
  81. neg 207840 60865
  82. pos 8697 19960
  83.  
  84. > #accuracy
  85. > mean(lasso_predict==testData$New_Product_Type)
  86. [1] 0
Add Comment
Please, Sign In to add comment