Guest User

Untitled

a guest
Nov 21st, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.26 KB | None | 0 0
  1. ctree <- ctree(quotation ~ minute + temp, data = visitquot)
  2. print(ctree)
  3.  
  4. Fitted party:
  5. [1] root
  6. | [2] minute <= 600
  7. | | [3] minute <= 227
  8. | | | [4] temp <= -0.4259
  9. | | | | [5] temp <= -2.3174: 0.015 (n = 6254, err = 89.7)
  10. | | | | [6] temp > -2.3174
  11. | | | | | [7] minute <= 68: 0.028 (n = 4562, err = 126.3)
  12. | | | | | [8] minute > 68: 0.046 (n = 7100, err = 312.8)
  13. | | | [9] temp > -0.4259
  14. | | | | [10] temp <= 6.0726: 0.015 (n = 56413, err = 860.5)
  15. | | | | [11] temp > 6.0726: 0.019 (n = 39779, err = 758.9)
  16. | | [12] minute > 227
  17. | | | [13] minute <= 501
  18. | | | | [14] minute <= 291: 0.013 (n = 30671, err = 388.0)
  19. | | | | [15] minute > 291: 0.009 (n = 559646, err = 5009.3)
  20. | | | [16] minute > 501
  21. | | | | [17] temp <= 5.2105
  22. | | | | | [18] temp <= -1.8393: 0.009 (n = 66326, err = 617.1)
  23. | | | | | [19] temp > -1.8393: 0.012 (n = 355986, err = 4289.0)
  24. | | | | [20] temp > 5.2105
  25. | | | | | [21] temp <= 13.6927: 0.014 (n = 287909, err = 3900.7)
  26. | | | | | [22] temp > 13.6927
  27. | | | | | | [23] temp <= 14: 0.035 (n = 2769, err = 92.7)
  28. | | | | | | [24] temp > 14: 0.007 (n = 2161, err = 15.9)
  29. | [25] minute > 600
  30. | | [26] temp <= 1.6418
  31. | | | [27] temp <= -2.3366: 0.012 (n = 110810, err = 1268.1)
  32. | | | [28] temp > -2.3366: 0.014 (n = 584457, err = 7973.2)
  33. | | [29] temp > 1.6418: 0.016 (n = 3753208, err = 57864.3)
  34.  
  35. plot(ctree, type = "simple")
  36.  
  37. library(partykit)
  38. airq <- subset(airquality, !is.na(Ozone))
  39. airct <- ctree(Ozone ~ ., data = airq)
  40. print(airct)
  41. # Model formula:
  42. # Ozone ~ Solar.R + Wind + Temp + Month + Day
  43. #
  44. # Fitted party:
  45. # [1] root
  46. # | [2] Temp <= 82
  47. # | | [3] Wind <= 6.9: 55.600 (n = 10, err = 21946.4)
  48. # | | [4] Wind > 6.9
  49. # | | | [5] Temp <= 77: 18.479 (n = 48, err = 3956.0)
  50. # | | | [6] Temp > 77: 31.143 (n = 21, err = 4620.6)
  51. # | [7] Temp > 82
  52. # | | [8] Wind <= 10.3: 81.633 (n = 30, err = 15119.0)
  53. # | | [9] Wind > 10.3: 48.714 (n = 7, err = 1183.4)
  54. #
  55. # Number of inner nodes: 4
  56. # Number of terminal nodes: 5
  57.  
  58. detach("package:partykit", unload=TRUE)
  59. library(party)
  60. airct <- party::ctree(Ozone ~ ., data = airq)
  61.  
  62. t(sapply(unique(where(airct)), function(x) {
  63. n <- nodes(airct, x)[[1]]
  64. Ozone <- airq[as.logical(n$weights), "Ozone"]
  65. cbind.data.frame("Node" = as.integer(x),
  66. "n" = length(Ozone),
  67. "Avg."= mean(Ozone),
  68. "Variance"= var(Ozone),
  69. "SSE" = sum((Ozone - mean(Ozone))^2))
  70. }))
  71.  
  72. # Node n Avg. Variance SSE
  73. # [1,] 5 48 18.47917 84.16977 3955.979
  74. # [2,] 3 10 55.6 2438.489 21946.4
  75. # [3,] 6 21 31.14286 231.0286 4620.571
  76. # [4,] 9 7 48.71429 197.2381 1183.429
  77. # [5,] 8 30 81.63333 521.3437 15118.97
  78.  
  79. terNodes <- unique(where(airct))
  80. setdiff(1:max(terNodes), terNodes)
  81.  
  82. sapply(setdiff(1:max(terNodes), terNodes), function(x) {
  83. n <- nodes(airct, x)[[1]]
  84. pvalue <- 1 - nodes(airct, x)[[1]]$criterion$maxcriterion
  85. plab <- ifelse(pvalue < 10^(-3),
  86. paste("p <", 10^(-3)),
  87. paste("p =", round(pvalue, digits = 3)))
  88. c("Node" = x, "P-value" = plab)
  89. })
  90.  
  91. # [,1] [,2] [,3] [,4]
  92. # Node "1" "2" "4" "7"
  93. # P-value "p < 0.001" "p = 0.002" "p = 0.003" "p = 0.003"
  94.  
  95. library("partykit")
  96. data("PimaIndiansDiabetes", package = "mlbench")
  97. ct <- ctree(diabetes ~ ., data = PimaIndiansDiabetes)
  98. plot(as.simpleparty(ct))
  99.  
  100. nodeapply(ct, ids = 1, function(n) info_node(n)$criterion)
  101. ## $`1`
  102. ## pregnant glucose pressure triceps insulin
  103. ## statistic 3.631413e+00 5.117841e+00 1.1778530 1.455334 2.570457503
  104. ## p.value -6.380290e-09 -2.710725e-37 -0.5937987 -0.313498 -0.002398554
  105. ## mass pedigree age
  106. ## statistic 4.185236e+00 3.143294e+00 3.774507e+00
  107. ## p.value -4.181295e-15 -1.180135e-05 -3.262459e-10
  108.  
  109. library("strucchange")
  110. sctest(ct, node = 1)
  111. ## pregnant glucose pressure triceps insulin mass
  112. ## statistic 3.776615e+01 166.9745 3.2473947 4.2859164 13.07180346 6.570902e+01
  113. ## p.value 6.380290e-09 0.0000 0.4477744 0.2691142 0.00239568 4.218847e-15
  114. ## pedigree age
  115. ## statistic 2.318009e+01 4.357601e+01
  116. ## p.value 1.180128e-05 3.262459e-10
  117.  
  118. nodeapply(ct, ids = nodeids(ct), function(n) info_node(n)$p.value)
  119. ## $`1`
  120. ## glucose
  121. ## 0
  122. ##
  123. ## $`2`
  124. ## age
  125. ## 6.048661e-07
  126. ##
  127. ## $`3`
  128. ## mass
  129. ## 0.001169778
  130. ##
  131. ## ...
  132.  
  133. tab <- tapply(PimaIndiansDiabetes$diabetes, predict(ct, type = "node"),
  134. function(y) c("n" = length(y), 100 * prop.table(table(y))))
  135. do.call("rbind", tab)
  136. ## n neg pos
  137. ## 5 144 99.30556 0.6944444
  138. ## 6 7 85.71429 14.2857143
  139. ## 7 120 82.50000 17.5000000
  140. ## 8 214 66.82243 33.1775701
  141. ## 11 53 79.24528 20.7547170
  142. ## 12 108 39.81481 60.1851852
  143. ## 13 122 19.67213 80.3278689
  144.  
  145. library(partykit)
  146.  
  147. # data
  148. airquality = data.frame(airquality)
  149.  
  150. # create a numeric binary variable as dependent variable
  151. airquality$OzoneClass = 0
  152. airquality$OzoneClass[airquality$Ozone>=34] =1
  153.  
  154. # regression tree with scale dependent variable
  155. airq <- subset(airquality, !is.na(Ozone))
  156. airct <- ctree(OzoneClass ~ Temp, data = airq)
  157. print(airct)
  158.  
  159. # Model formula:
  160. # OzoneClass ~ Temp
  161. #
  162. # Fitted party:
  163. # [1] root
  164. # | [2] Temp <= 82
  165. # | | [3] Temp <= 77: 0.096 (n = 52, err = 4.5)
  166. # | | [4] Temp > 77: 0.519 (n = 27, err = 6.7)
  167. # | [5] Temp > 82: 0.973 (n = 37, err = 1.0)
  168. #
  169. # Number of inner nodes: 2
  170. # Number of terminal nodes: 3
  171.  
  172.  
  173.  
  174.  
  175. # create a categorical binary variable as dependent variable
  176. airquality$OzoneClass = 0
  177. airquality$OzoneClass[airquality$Ozone>=34] =1
  178. airquality$OzoneClass = as.factor(airquality$OzoneClass)
  179.  
  180. # classification tree
  181. airq <- subset(airquality, !is.na(Ozone))
  182. airct <- ctree(OzoneClass ~ Temp, data = airq)
  183. print(airct)
  184.  
  185. # Model formula:
  186. # OzoneClass ~ Temp
  187. #
  188. # Fitted party:
  189. # [1] root
  190. # | [2] Temp <= 82
  191. # | | [3] Temp <= 77: 0 (n = 52, err = 9.6%)
  192. # | | [4] Temp > 77: 1 (n = 27, err = 48.1%)
  193. # | [5] Temp > 82: 1 (n = 37, err = 2.7%)
  194. #
  195. # Number of inner nodes: 2
  196. # Number of terminal nodes: 3
Add Comment
Please, Sign In to add comment