Advertisement
Guest User

Rattle log

a guest
Aug 30th, 2015
352
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.81 KB | None | 0 0
  1. # Rattle is Copyright (c) 2006-2014 Togaware Pty Ltd.
  2.  
  3. #============================================================
  4. # Rattle timestamp: 2015-06-07 17:53:27 x86_64-apple-darwin13.4.0
  5.  
  6. # Rattle version 3.4.1 user 'xyz'
  7.  
  8. # Export this log textview to a file using the Export button or the Tools
  9. # menu to save a log of all activity. This facilitates repeatability. Exporting
  10. # to file 'myrf01.R', for example, allows us to the type in the R Console
  11. # the command source('myrf01.R') to repeat the process automatically.
  12. # Generally, we may want to edit the file to suit our needs. We can also directly
  13. # edit this current log textview to record additional information before exporting.
  14.  
  15. # Saving and loading projects also retains this log.
  16.  
  17. library(rattle)
  18.  
  19. # This log generally records the process of building a model. However, with very
  20. # little effort the log can be used to score a new dataset. The logical variable
  21. # 'building' is used to toggle between generating transformations, as when building
  22. # a model, and simply using the transformations, as when scoring a dataset.
  23.  
  24. building <- TRUE
  25. scoring <- ! building
  26.  
  27. # The colorspace package is used to generate the colours used in plots, if available.
  28.  
  29. library(colorspace)
  30.  
  31. # A pre-defined value is used to reset the random seed so that results are repeatable.
  32.  
  33. crv$seed <- 42
  34.  
  35. #============================================================
  36. # Rattle timestamp: 2015-06-07 17:54:04 x86_64-apple-darwin13.4.0
  37.  
  38. # Load the data.
  39.  
  40. crs$dataset <- read.csv("file:///Volumes/USBCRUZER/StackOverflow2015RockyMtnData.csv", na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8")
  41.  
  42. #============================================================
  43. # Rattle timestamp: 2015-06-07 17:54:06 x86_64-apple-darwin13.4.0
  44.  
  45. # Note the user selections.
  46.  
  47. # Build the training/validate/test datasets.
  48.  
  49. set.seed(crv$seed)
  50. crs$nobs <- nrow(crs$dataset) # 26086 observations
  51. crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
  52. crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
  53. crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
  54.  
  55. # The following variable selections have been noted.
  56.  
  57. crs$input <- c("Country", "Age.Orig", "Age", "AgeRecode",
  58. "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "ExpRecode", "Occupation",
  59. "Desktop.Operating.System", "Compensation.Orig", "Compensation", "CompRecode",
  60. "Compensation..midpoint", "Employment.Status.Orig", "Employment.Status", "EmpStatRecode",
  61. "Industry", "Job.Satisfaction.Orig", "Job.Satisfaction", "JobSatisRecode",
  62. "Remote.Status.Orig", "Remote.Status")
  63.  
  64. crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode", "EmpStatRecode",
  65. "JobSatisRecode")
  66.  
  67. crs$categoric <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig",
  68. "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig",
  69. "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "Employment.Status",
  70. "Industry", "Job.Satisfaction.Orig", "Job.Satisfaction", "Remote.Status.Orig",
  71. "Remote.Status")
  72.  
  73. crs$target <- "RemoteRecode"
  74. crs$risk <- NULL
  75. crs$ident <- NULL
  76. crs$ignore <- NULL
  77. crs$weights <- NULL
  78.  
  79. #============================================================
  80. # Rattle timestamp: 2015-06-07 17:56:04 x86_64-apple-darwin13.4.0
  81.  
  82. # Note the user selections.
  83.  
  84. # Build the training/validate/test datasets.
  85.  
  86. set.seed(crv$seed)
  87. crs$nobs <- nrow(crs$dataset) # 26086 observations
  88. crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
  89. crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
  90. crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
  91.  
  92. # The following variable selections have been noted.
  93.  
  94. crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
  95. "Remote.Status")
  96.  
  97. crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
  98.  
  99. crs$categoric <- c("Employment.Status", "Remote.Status")
  100.  
  101. crs$target <- "Job.Satisfaction"
  102. crs$risk <- NULL
  103. crs$ident <- NULL
  104. crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
  105. crs$weights <- NULL
  106.  
  107. #============================================================
  108. # Rattle timestamp: 2015-06-07 17:56:12 x86_64-apple-darwin13.4.0
  109.  
  110. # Save the project data (variable crs) to file.
  111.  
  112. save(crs, file="/Volumes/USBCRUZER/RWorkingDirectory/StackOverflow2015RockyMtnData.rattle", compress=TRUE)
  113.  
  114. #============================================================
  115. # Rattle timestamp: 2015-06-07 17:56:15 x86_64-apple-darwin13.4.0
  116.  
  117. # Note the user selections.
  118.  
  119. # Build the training/validate/test datasets.
  120.  
  121. set.seed(crv$seed)
  122. crs$nobs <- nrow(crs$dataset) # 26086 observations
  123. crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
  124. crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
  125. crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
  126.  
  127. # The following variable selections have been noted.
  128.  
  129. crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
  130. "Remote.Status")
  131.  
  132. crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
  133.  
  134. crs$categoric <- c("Employment.Status", "Remote.Status")
  135.  
  136. crs$target <- "Job.Satisfaction"
  137. crs$risk <- NULL
  138. crs$ident <- NULL
  139. crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
  140. crs$weights <- NULL
  141.  
  142. #============================================================
  143. # Rattle timestamp: 2015-06-07 17:56:20 x86_64-apple-darwin13.4.0
  144.  
  145. # Note the user selections.
  146.  
  147. # Build the training/validate/test datasets.
  148.  
  149. set.seed(crv$seed)
  150. crs$nobs <- nrow(crs$dataset) # 26086 observations
  151. crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
  152. crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
  153. crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
  154.  
  155. # The following variable selections have been noted.
  156.  
  157. crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
  158. "Remote.Status")
  159.  
  160. crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
  161.  
  162. crs$categoric <- c("Employment.Status", "Remote.Status")
  163.  
  164. crs$target <- "Job.Satisfaction"
  165. crs$risk <- NULL
  166. crs$ident <- NULL
  167. crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
  168. crs$weights <- NULL
  169.  
  170. #============================================================
  171. # Rattle timestamp: 2015-08-30 21:24:04 x86_64-apple-darwin13.4.0
  172.  
  173. # Reload the project data (variable crs) from file.
  174.  
  175. load("/Volumes/USBCRUZER/RWorkingDirectory/StackOverflow2015RockyMtnData.rattle")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement