Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- > # Prep Training and Test data.
- > trainDataIndex <- sample(1:nrow(df), 0.7*nrow(df)) # 70% training data
- > trainData <- df[trainDataIndex, ]
- > testData <- df[-trainDataIndex, ]
- > set.seed(100)
- > trainData <-
- + trainData %>%
- + dplyr::mutate(CUST_REGION_DESCR =
- + forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
- > testData <-
- + testData %>%
- + dplyr::mutate(CUST_REGION_DESCR =
- + forcats::fct_relabel(CUST_REGION_DESCR, ~ trimws(.x)))
- > str(trainData)
- 'data.frame': 693843 obs. of 4 variables:
- $ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 7 10 9 10 9 10 5 10 5 ...
- $ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 2 6 7 6 8 8 4 7 7 6 ...
- $ Sales : num 92.7 2356 39 239.6 26 ...
- $ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
- > str(testData)
- 'data.frame': 297362 obs. of 4 variables:
- $ cust_prog_level : Factor w/ 14 levels "B","C","D","E",..: 9 5 9 9 9 9 3 3 5 3 ...
- $ CUST_REGION_DESCR: Factor w/ 8 levels "CORPORATE REGION",..: 3 3 6 6 7 6 7 2 2 4 ...
- $ Sales : num 150.2 68.5 68.1 72.1 60.1 ...
- $ New_Product_Type : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
- > x = model.matrix(New_Product_Type ~.,data=trainData)
- > cvfit = cv.glmnet(x, y=as.factor(trainData$New_Product_Type), alpha=1, family="binomial",type.measure = "mse")
- > lambda_1se <- cvfit$lambda.1se
- > coef(cvfit,s=lambda_1se)
- 23 x 1 sparse Matrix of class "dgCMatrix"
- 1
- (Intercept) 0.02946581
- (Intercept) .
- cust_prog_levelC 0.14012975
- cust_prog_levelD .
- cust_prog_levelE 0.13339906
- cust_prog_levelG -0.05325043
- cust_prog_levelI 0.21440592
- cust_prog_levelL 0.26273503
- cust_prog_levelM .
- cust_prog_levelN 0.26620261
- cust_prog_levelP -0.05166799
- cust_prog_levelR -0.33054803
- cust_prog_levelS .
- cust_prog_levelX 0.57508875
- cust_prog_levelZ 1.20748454
- CUST_REGION_DESCRMOUNTAIN WEST REGION -0.20993854
- CUST_REGION_DESCRNORTH CENTRAL REGION -0.04035331
- CUST_REGION_DESCRNORTH EAST REGION 0.01082858
- CUST_REGION_DESCROHIO VALLEY REGION 0.03077584
- CUST_REGION_DESCRSOUTH CENTRAL REGION .
- CUST_REGION_DESCRSOUTH EAST REGION 0.10606213
- CUST_REGION_DESCRWESTERN REGION -0.17587036
- Sales -0.01223843
- > #get test data
- > x_test <- model.matrix(New_Product_Type~.,data = testData)
- > #predict New_Product_Type, type=”New_Product_Type”
- > lasso_prob <- predict(cvfit,newx = x_test,s=lambda_1se,type="response")
- > #translate probabilities to predictions
- > lasso_predict <- rep("neg",nrow(testData))
- > lasso_predict[lasso_prob>.5] <- "pos"
- > #confusion matrix
- > table(pred=lasso_predict,true=testData$New_Product_Type)
- true
- pred 0 1
- neg 207840 60865
- pos 8697 19960
- > #accuracy
- > lasso_predict[lasso_prob>.8] <- "pos"
- > #confusion matrix
- > table(pred=lasso_predict,true=testData$New_Product_Type)
- true
- pred 0 1
- neg 207840 60865
- pos 8697 19960
- > #accuracy
- > mean(lasso_predict==testData$New_Product_Type)
- [1] 0
Add Comment
Please, Sign In to add comment