Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ctree <- ctree(quotation ~ minute + temp, data = visitquot)
- print(ctree)
- Fitted party:
- [1] root
- | [2] minute <= 600
- | | [3] minute <= 227
- | | | [4] temp <= -0.4259
- | | | | [5] temp <= -2.3174: 0.015 (n = 6254, err = 89.7)
- | | | | [6] temp > -2.3174
- | | | | | [7] minute <= 68: 0.028 (n = 4562, err = 126.3)
- | | | | | [8] minute > 68: 0.046 (n = 7100, err = 312.8)
- | | | [9] temp > -0.4259
- | | | | [10] temp <= 6.0726: 0.015 (n = 56413, err = 860.5)
- | | | | [11] temp > 6.0726: 0.019 (n = 39779, err = 758.9)
- | | [12] minute > 227
- | | | [13] minute <= 501
- | | | | [14] minute <= 291: 0.013 (n = 30671, err = 388.0)
- | | | | [15] minute > 291: 0.009 (n = 559646, err = 5009.3)
- | | | [16] minute > 501
- | | | | [17] temp <= 5.2105
- | | | | | [18] temp <= -1.8393: 0.009 (n = 66326, err = 617.1)
- | | | | | [19] temp > -1.8393: 0.012 (n = 355986, err = 4289.0)
- | | | | [20] temp > 5.2105
- | | | | | [21] temp <= 13.6927: 0.014 (n = 287909, err = 3900.7)
- | | | | | [22] temp > 13.6927
- | | | | | | [23] temp <= 14: 0.035 (n = 2769, err = 92.7)
- | | | | | | [24] temp > 14: 0.007 (n = 2161, err = 15.9)
- | [25] minute > 600
- | | [26] temp <= 1.6418
- | | | [27] temp <= -2.3366: 0.012 (n = 110810, err = 1268.1)
- | | | [28] temp > -2.3366: 0.014 (n = 584457, err = 7973.2)
- | | [29] temp > 1.6418: 0.016 (n = 3753208, err = 57864.3)
- plot(ctree, type = "simple")
- library(partykit)
- airq <- subset(airquality, !is.na(Ozone))
- airct <- ctree(Ozone ~ ., data = airq)
- print(airct)
- # Model formula:
- # Ozone ~ Solar.R + Wind + Temp + Month + Day
- #
- # Fitted party:
- # [1] root
- # | [2] Temp <= 82
- # | | [3] Wind <= 6.9: 55.600 (n = 10, err = 21946.4)
- # | | [4] Wind > 6.9
- # | | | [5] Temp <= 77: 18.479 (n = 48, err = 3956.0)
- # | | | [6] Temp > 77: 31.143 (n = 21, err = 4620.6)
- # | [7] Temp > 82
- # | | [8] Wind <= 10.3: 81.633 (n = 30, err = 15119.0)
- # | | [9] Wind > 10.3: 48.714 (n = 7, err = 1183.4)
- #
- # Number of inner nodes: 4
- # Number of terminal nodes: 5
- detach("package:partykit", unload=TRUE)
- library(party)
- airct <- party::ctree(Ozone ~ ., data = airq)
- t(sapply(unique(where(airct)), function(x) {
- n <- nodes(airct, x)[[1]]
- Ozone <- airq[as.logical(n$weights), "Ozone"]
- cbind.data.frame("Node" = as.integer(x),
- "n" = length(Ozone),
- "Avg."= mean(Ozone),
- "Variance"= var(Ozone),
- "SSE" = sum((Ozone - mean(Ozone))^2))
- }))
- # Node n Avg. Variance SSE
- # [1,] 5 48 18.47917 84.16977 3955.979
- # [2,] 3 10 55.6 2438.489 21946.4
- # [3,] 6 21 31.14286 231.0286 4620.571
- # [4,] 9 7 48.71429 197.2381 1183.429
- # [5,] 8 30 81.63333 521.3437 15118.97
- terNodes <- unique(where(airct))
- setdiff(1:max(terNodes), terNodes)
- sapply(setdiff(1:max(terNodes), terNodes), function(x) {
- n <- nodes(airct, x)[[1]]
- pvalue <- 1 - nodes(airct, x)[[1]]$criterion$maxcriterion
- plab <- ifelse(pvalue < 10^(-3),
- paste("p <", 10^(-3)),
- paste("p =", round(pvalue, digits = 3)))
- c("Node" = x, "P-value" = plab)
- })
- # [,1] [,2] [,3] [,4]
- # Node "1" "2" "4" "7"
- # P-value "p < 0.001" "p = 0.002" "p = 0.003" "p = 0.003"
- library("partykit")
- data("PimaIndiansDiabetes", package = "mlbench")
- ct <- ctree(diabetes ~ ., data = PimaIndiansDiabetes)
- plot(as.simpleparty(ct))
- nodeapply(ct, ids = 1, function(n) info_node(n)$criterion)
- ## $`1`
- ## pregnant glucose pressure triceps insulin
- ## statistic 3.631413e+00 5.117841e+00 1.1778530 1.455334 2.570457503
- ## p.value -6.380290e-09 -2.710725e-37 -0.5937987 -0.313498 -0.002398554
- ## mass pedigree age
- ## statistic 4.185236e+00 3.143294e+00 3.774507e+00
- ## p.value -4.181295e-15 -1.180135e-05 -3.262459e-10
- library("strucchange")
- sctest(ct, node = 1)
- ## pregnant glucose pressure triceps insulin mass
- ## statistic 3.776615e+01 166.9745 3.2473947 4.2859164 13.07180346 6.570902e+01
- ## p.value 6.380290e-09 0.0000 0.4477744 0.2691142 0.00239568 4.218847e-15
- ## pedigree age
- ## statistic 2.318009e+01 4.357601e+01
- ## p.value 1.180128e-05 3.262459e-10
- nodeapply(ct, ids = nodeids(ct), function(n) info_node(n)$p.value)
- ## $`1`
- ## glucose
- ## 0
- ##
- ## $`2`
- ## age
- ## 6.048661e-07
- ##
- ## $`3`
- ## mass
- ## 0.001169778
- ##
- ## ...
- tab <- tapply(PimaIndiansDiabetes$diabetes, predict(ct, type = "node"),
- function(y) c("n" = length(y), 100 * prop.table(table(y))))
- do.call("rbind", tab)
- ## n neg pos
- ## 5 144 99.30556 0.6944444
- ## 6 7 85.71429 14.2857143
- ## 7 120 82.50000 17.5000000
- ## 8 214 66.82243 33.1775701
- ## 11 53 79.24528 20.7547170
- ## 12 108 39.81481 60.1851852
- ## 13 122 19.67213 80.3278689
- library(partykit)
- # data
- airquality = data.frame(airquality)
- # create a numeric binary variable as dependent variable
- airquality$OzoneClass = 0
- airquality$OzoneClass[airquality$Ozone>=34] =1
- # regression tree with scale dependent variable
- airq <- subset(airquality, !is.na(Ozone))
- airct <- ctree(OzoneClass ~ Temp, data = airq)
- print(airct)
- # Model formula:
- # OzoneClass ~ Temp
- #
- # Fitted party:
- # [1] root
- # | [2] Temp <= 82
- # | | [3] Temp <= 77: 0.096 (n = 52, err = 4.5)
- # | | [4] Temp > 77: 0.519 (n = 27, err = 6.7)
- # | [5] Temp > 82: 0.973 (n = 37, err = 1.0)
- #
- # Number of inner nodes: 2
- # Number of terminal nodes: 3
- # create a categorical binary variable as dependent variable
- airquality$OzoneClass = 0
- airquality$OzoneClass[airquality$Ozone>=34] =1
- airquality$OzoneClass = as.factor(airquality$OzoneClass)
- # classification tree
- airq <- subset(airquality, !is.na(Ozone))
- airct <- ctree(OzoneClass ~ Temp, data = airq)
- print(airct)
- # Model formula:
- # OzoneClass ~ Temp
- #
- # Fitted party:
- # [1] root
- # | [2] Temp <= 82
- # | | [3] Temp <= 77: 0 (n = 52, err = 9.6%)
- # | | [4] Temp > 77: 1 (n = 27, err = 48.1%)
- # | [5] Temp > 82: 1 (n = 37, err = 2.7%)
- #
- # Number of inner nodes: 2
- # Number of terminal nodes: 3
Add Comment
Please, Sign In to add comment