daily pastebin goal
25%
SHARE
TWEET

Untitled

a guest Nov 20th, 2017 60 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #Labb2 - Assignment 2
  2. require(tree)
  3. rm(list=ls())
  4. library(readxl)
  5. data <- read_excel("creditscoring.xls")
  6. set.seed(12345)
  7.  
  8. n=dim(data)[1]
  9. id=sample(1:n, floor(n*0.5))
  10. train=data[id,]
  11. test=data[-id,]
  12.  
  13. nv=dim(train)[1]
  14. idv=sample(1:nv, floor(nv*0.5))
  15. test=train[idv,]
  16. valid=train[-idv,]
  17.  
  18. #2
  19. model_dev = tree(as.factor(good_bad)~.,train,split = "deviance")
  20. pred_model_dev = predict(model_dev,train,type="class")
  21. summary(model_dev)
  22.  
  23. model_gin = tree(as.factor(good_bad)~., data =train,split = "gini")
  24. pred_model_gin = predict(model_dev,train,type="class")
  25. summary(model_gin)
  26. #3
  27. # cv.res=cv.tree(model_dev)
  28. # plot(cv.res$size, cv.res$dev, type="b",
  29. #      col="red")
  30. # plot(log(cv.res$k), cv.res$dev,
  31. #      type="b", col="red")
  32. # min_gin=min(cv.res)
  33.  
  34. #optimal leaves for deviance-fit
  35.  
  36. fit=model_dev
  37. trainScore=rep(0,9)
  38. testScore=rep(0,9)
  39. for(i in 2:9) {
  40.   prunedTree=prune.tree(fit,best=i)
  41.   pred=predict(prunedTree, newdata=valid,
  42.                type="tree")
  43.   trainScore[i]=deviance(prunedTree)
  44.   testScore[i]=deviance(pred)
  45. }
  46. plot(2:9, trainScore[2:9], type="b", col="red",
  47.      ylim=c(200,600))
  48. points(2:9, testScore[2:9], type="b", col="blue")
RAW Paste Data
Top