Advertisement
Guest User

Untitled

a guest
Feb 12th, 2016
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.57 KB | None | 0 0
  1. ### Preparation
  2.  
  3. StoreAsAFM <- function(vars, data, afmFile) {
  4. ### Stores a data.frame as an AFM file. Causal effect decision tree is get passed an AFM data file
  5. # vars: the variables to include
  6. # data: data.frame
  7. # afmFile: output filename
  8. ### It handles numerical, logical, and character variables. Missing values are alowed
  9.  
  10. cat(c('.', 1:nrow(data)), file = afmFile, sep = '\t')
  11. cat('\n', file = afmFile, append = T)
  12. for (v in vars) {
  13. feature <- data[[v]]
  14. if(is.factor(feature)) {
  15. feature <- as.character(feature)
  16. }
  17. if (is.numeric(feature)) {
  18. fieldAnnotation <- sprintf('N:%s', v)
  19. } else if (is.character(feature)) {
  20. feature[feature == ''] <- 'NA'
  21. fieldAnnotation <- sprintf('C:%s', v)
  22. } else if (is.logical(feature)) {
  23. fieldAnnotation <- sprintf('C:%s', v)
  24. } else {
  25. stop(sprintf("%s takes no type", v))
  26. }
  27. print(fieldAnnotation)
  28. cat(c(fieldAnnotation, feature), file = afmFile, sep = '\t', append = T)
  29. cat('\n', file = afmFile, append = T)
  30. }
  31. }
  32.  
  33.  
  34. options(width=180)
  35. library(readr)
  36. u <- read_csv("profile_data.csv")
  37. u <- subset(u, u$joinDateUTC < as.Date("2016-02-04")-14)
  38. p <- read_csv("purchase_daily.csv")
  39. p <- subset(p, p$day <= 14)
  40. pp <- tapply(p$value, p$userId, sum)
  41. uu <- merge(u, data.frame(userId=as.integer(names(pp)), value=pp), by="userId", all.x=T)
  42. uu$value[is.na(uu$value)] <- 0
  43. write.csv(uu, "user_value.csv")
  44.  
  45. # uu <- read.csv("user_value.csv")
  46. segments <- as.character(unique(uu$segment))
  47. control <- segments[grepl("Control", segments)]
  48. variations <- segments[!grepl("Control", segments)]
  49.  
  50. vars <- c("kifCountry", "manufacturer", "mvnoName", "osVersion", "sdkVersion", "totalRamInKB", "listPrice", "primaryStorageTotalInMB", "timeZoneMin", "xDpi", "yDpi", "densityDpi", "heightPixels", "widthPixels", "value", "variation")
  51.  
  52.  
  53. uu$variation <- uu$segment
  54. training <- runif(nrow(uu)) < 0.6
  55. StoreAsAFM(vars, subset(uu, training), "training_multiarm.afm")
  56. StoreAsAFM(vars, subset(uu, !training), "testing_multiarm.afm")
  57. write.csv(subset(uu, training), "training_multiarm.csv")
  58. write.csv(subset(uu, !training), "testing_multiarm.csv")
  59.  
  60. ### Cross-validation
  61.  
  62. uu <- read.csv("testing_multiarm.csv")
  63. uul <- subset(uu, (leftNode))
  64. uur <- subset(uu, !(leftNode) & !is.null(uu$billingCountry))
  65.  
  66. tapply(uul$value, uul$variation, mean)
  67. tapply(uur$value, uur$variation, mean)
  68.  
  69. bv <- boot(data=subset(uul, uul$variation), statistic=function(data, indices) {mean(data$value[indices])}, R = 300)
  70. bc <- boot(data=subset(uul, !uul$variation), statistic=function(data, indices) {mean(data$value[indices])}, R = 300)
  71. mean(bv$t > bc$t)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement