Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Labb2 - Assignment 2
- require(tree)
- rm(list=ls())
- library(readxl)
- data <- read_excel("creditscoring.xls")
- set.seed(12345)
- n=dim(data)[1]
- id=sample(1:n, floor(n*0.5))
- train=data[id,]
- test=data[-id,]
- nv=dim(train)[1]
- idv=sample(1:nv, floor(nv*0.5))
- test=train[idv,]
- valid=train[-idv,]
- #2
- model_dev = tree(as.factor(good_bad)~.,train,split = "deviance")
- pred_model_dev = predict(model_dev,train,type="class")
- summary(model_dev)
- model_gin = tree(as.factor(good_bad)~., data =train,split = "gini")
- pred_model_gin = predict(model_dev,train,type="class")
- summary(model_gin)
- #3
- # cv.res=cv.tree(model_dev)
- # plot(cv.res$size, cv.res$dev, type="b",
- # col="red")
- # plot(log(cv.res$k), cv.res$dev,
- # type="b", col="red")
- # min_gin=min(cv.res)
- #optimal leaves for deviance-fit
- fit=model_dev
- trainScore=rep(0,9)
- testScore=rep(0,9)
- for(i in 2:9) {
- prunedTree=prune.tree(fit,best=i)
- pred=predict(prunedTree, newdata=valid,
- type="tree")
- trainScore[i]=deviance(prunedTree)
- testScore[i]=deviance(pred)
- }
- plot(2:9, trainScore[2:9], type="b", col="red",
- ylim=c(200,600))
- points(2:9, testScore[2:9], type="b", col="blue")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement