Advertisement
simondp

Selecting segments with trees

Oct 31st, 2019
44
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.93 KB | None | 0 0
  1. # This script simulates data and estimates how many segments it should be cut into
  2. # using the tree package
  3.  
  4. # Load library
  5. library(brms)
  6. require(tree)
  7.  
  8. # Clear work space
  9. rm(list=ls())
  10.  
  11. # True rates (Unkown in reality)
  12. trueRates <- c(0.76,0.65,0.58,0.63,0.79,0.67,0.65,0.70)
  13. category <- as.factor(1:8)
  14.  
  15. ### Simulate data
  16. # Generate data for 1 subject
  17. subject <- 1
  18. indvar <- rnorm(1,0,0.02) # Variance how good or bad is this individual?
  19. results <- cbind(subject,(rbinom(25, 1, trueRates[1]+indvar)),category[1])
  20. for(i in 2:8){results <- rbind(results,cbind(subject,(rbinom(25, 1, trueRates[i]+indvar)),category[i]))}
  21. # Loop for rest
  22. for(s in 2:20){
  23.   indvar <- rnorm(1,0,0.02)
  24.   for(i in 1:8){results <- rbind(results,cbind(s,(rbinom(25, 1, trueRates[i]+indvar)),category[i]))}}
  25.  
  26. # Tweak and name Data frame
  27. results <- data.frame(results); names(results) <- c("Subject","Cor","Category")
  28. results$Subject <- as.factor(results$Subject)
  29. results$Category <- as.factor(results$Category)
  30.  
  31. # # Make model that takes Subject as random intercept (It will take some processing time)
  32. # Model8Categories <- brm(Cor ~ Category + (1|Subject), family=bernoulli(link = "logit"), data = results,
  33. #                  chains = 2,iter = 2000, warmup = 500)
  34. # saveplot <- marginal_effects(Model8Categories)
  35. #
  36. # # Retrieve estimates
  37. # round(saveplot[["Category"]][["estimate__"]],2)-trueRates
  38. # # Fairly close estimates
  39.  
  40. # Use Tree to segment driven by data (But not subject as random effect)
  41. meanpercat <- aggregate(Cor ~ Subject + Category, results, mean) # Find mean for each participant, each condition
  42. tree.groups <-tree(Cor~ Category, data=meanpercat)
  43. cv.groups <- cv.tree(tree.groups,FUN=prune.tree)
  44.  
  45. # How many segments are best according to CV?
  46. round(sum(cv.groups$size[which(cv.groups$dev == min(cv.groups$dev))]/(length(which(cv.groups$dev == min(cv.groups$dev))))))
  47. # Running this a few times gives variance from 2-5 segments being best
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement