Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Rattle is Copyright (c) 2006-2014 Togaware Pty Ltd.
- #============================================================
- # Rattle timestamp: 2015-06-07 17:53:27 x86_64-apple-darwin13.4.0
- # Rattle version 3.4.1 user 'xyz'
- # Export this log textview to a file using the Export button or the Tools
- # menu to save a log of all activity. This facilitates repeatability. Exporting
- # to file 'myrf01.R', for example, allows us to the type in the R Console
- # the command source('myrf01.R') to repeat the process automatically.
- # Generally, we may want to edit the file to suit our needs. We can also directly
- # edit this current log textview to record additional information before exporting.
- # Saving and loading projects also retains this log.
- library(rattle)
- # This log generally records the process of building a model. However, with very
- # little effort the log can be used to score a new dataset. The logical variable
- # 'building' is used to toggle between generating transformations, as when building
- # a model, and simply using the transformations, as when scoring a dataset.
- building <- TRUE
- scoring <- ! building
- # The colorspace package is used to generate the colours used in plots, if available.
- library(colorspace)
- # A pre-defined value is used to reset the random seed so that results are repeatable.
- crv$seed <- 42
- #============================================================
- # Rattle timestamp: 2015-06-07 17:54:04 x86_64-apple-darwin13.4.0
- # Load the data.
- crs$dataset <- read.csv("file:///Volumes/USBCRUZER/StackOverflow2015RockyMtnData.csv", na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8")
- #============================================================
- # Rattle timestamp: 2015-06-07 17:54:06 x86_64-apple-darwin13.4.0
- # Note the user selections.
- # Build the training/validate/test datasets.
- set.seed(crv$seed)
- crs$nobs <- nrow(crs$dataset) # 26086 observations
- crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
- crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
- crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
- # The following variable selections have been noted.
- crs$input <- c("Country", "Age.Orig", "Age", "AgeRecode",
- "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "ExpRecode", "Occupation",
- "Desktop.Operating.System", "Compensation.Orig", "Compensation", "CompRecode",
- "Compensation..midpoint", "Employment.Status.Orig", "Employment.Status", "EmpStatRecode",
- "Industry", "Job.Satisfaction.Orig", "Job.Satisfaction", "JobSatisRecode",
- "Remote.Status.Orig", "Remote.Status")
- crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode", "EmpStatRecode",
- "JobSatisRecode")
- crs$categoric <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig",
- "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig",
- "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "Employment.Status",
- "Industry", "Job.Satisfaction.Orig", "Job.Satisfaction", "Remote.Status.Orig",
- "Remote.Status")
- crs$target <- "RemoteRecode"
- crs$risk <- NULL
- crs$ident <- NULL
- crs$ignore <- NULL
- crs$weights <- NULL
- #============================================================
- # Rattle timestamp: 2015-06-07 17:56:04 x86_64-apple-darwin13.4.0
- # Note the user selections.
- # Build the training/validate/test datasets.
- set.seed(crv$seed)
- crs$nobs <- nrow(crs$dataset) # 26086 observations
- crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
- crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
- crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
- # The following variable selections have been noted.
- crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
- "Remote.Status")
- crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
- crs$categoric <- c("Employment.Status", "Remote.Status")
- crs$target <- "Job.Satisfaction"
- crs$risk <- NULL
- crs$ident <- NULL
- crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
- crs$weights <- NULL
- #============================================================
- # Rattle timestamp: 2015-06-07 17:56:12 x86_64-apple-darwin13.4.0
- # Save the project data (variable crs) to file.
- save(crs, file="/Volumes/USBCRUZER/RWorkingDirectory/StackOverflow2015RockyMtnData.rattle", compress=TRUE)
- #============================================================
- # Rattle timestamp: 2015-06-07 17:56:15 x86_64-apple-darwin13.4.0
- # Note the user selections.
- # Build the training/validate/test datasets.
- set.seed(crv$seed)
- crs$nobs <- nrow(crs$dataset) # 26086 observations
- crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
- crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
- crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
- # The following variable selections have been noted.
- crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
- "Remote.Status")
- crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
- crs$categoric <- c("Employment.Status", "Remote.Status")
- crs$target <- "Job.Satisfaction"
- crs$risk <- NULL
- crs$ident <- NULL
- crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
- crs$weights <- NULL
- #============================================================
- # Rattle timestamp: 2015-06-07 17:56:20 x86_64-apple-darwin13.4.0
- # Note the user selections.
- # Build the training/validate/test datasets.
- set.seed(crv$seed)
- crs$nobs <- nrow(crs$dataset) # 26086 observations
- crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 18260 observations
- crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 3912 observations
- crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 3914 observations
- # The following variable selections have been noted.
- crs$input <- c("AgeRecode", "ExpRecode", "CompRecode", "Employment.Status",
- "Remote.Status")
- crs$numeric <- c("AgeRecode", "ExpRecode", "CompRecode")
- crs$categoric <- c("Employment.Status", "Remote.Status")
- crs$target <- "Job.Satisfaction"
- crs$risk <- NULL
- crs$ident <- NULL
- crs$ignore <- c("Country", "Age.Orig", "Age", "Years.IT...Programming.Experience.Orig", "Years.IT...Programming.Experience", "Occupation", "Desktop.Operating.System", "Compensation.Orig", "Compensation", "Compensation..midpoint", "Employment.Status.Orig", "EmpStatRecode", "Industry", "Job.Satisfaction.Orig", "JobSatisRecode", "Remote.Status.Orig", "RemoteRecode")
- crs$weights <- NULL
- #============================================================
- # Rattle timestamp: 2015-08-30 21:24:04 x86_64-apple-darwin13.4.0
- # Reload the project data (variable crs) from file.
- load("/Volumes/USBCRUZER/RWorkingDirectory/StackOverflow2015RockyMtnData.rattle")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement