Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(plyr)
- library(dplyr)
- library(magrittr)
- library(microbenchmark)
- set.seed(100)
- n_rows = 5e4
- n_cols = 200
- dat_dt = matrix(as.character(rbinom(n_rows*n_cols, 15, .5)), n_rows) %>% data.table %>% tbl_dt(FALSE)
- txt = dat_dt %>% mutate_(Vcobime = paste0("paste(", paste0("V", 1:n_cols, collapse = ","),
- ", sep = \",\")")) %>% .$Vcobime %>% paste(collapse = "\n")
- txt = gsub("15", "Null",txt)
- dat_df_default = read.csv(textConnection(txt), head = FALSE)
- dat_df_notFactors = read.csv(textConnection(txt), head = FALSE, stringsAsFactors=FALSE)
- dat_df_nastrings = read.csv(textConnection(txt), head = FALSE, na.strings = "Null")
- dat_dt_default = fread(txt, head = FALSE)
- dat_dt_nastrings = fread(txt, head = FALSE, na.strings="Null")
- sum(sapply(dat_df_notFactors, class)=="character")
- # 156 = number of character(factor) columns
- convert_df_int = function(df, classToChange){
- if (classToChange == "factor")
- df[which(sapply(df, class)==classToChange)] = sapply(df[which(sapply(df, class)==classToChange)], function(x) as.integer(as.character(x)))
- else if (classToChange == "character")
- df[which(sapply(df, class)==classToChange)] = sapply(df[which(sapply(df, class)==classToChange)], as.integer)
- df
- }
- convert_df_int_2 = function(df){
- loc = which(sapply(df, class)=="character")
- df[,loc] = as.integer(as.matrix(df[,loc]))
- df
- }
- convert_df_mutate_each = function(df, classToChange){
- loc = which(sapply(df, class)==classToChange)
- if (classToChange == "factor")
- df %>% mutate_each_(funs(as.character), names(loc)) %>% mutate_each_(funs(as.integer), names(loc))
- else if (classToChange == "character")
- df %>% mutate_each_(funs(as.integer), names(loc))
- }
- convert_df_transform = function(df, classToChange){
- loc = which(sapply(df, class)==classToChange)
- if (classToChange == "factor")
- expr = paste0(names(loc), "=as.integer(as.character(", names(loc), "))")
- else if (classToChange == "character")
- expr = paste0(names(loc), "=as.integer(", names(loc), ")")
- eval(parse(text = paste0("df %>% transform(", paste(expr, collapse = ","),")")))
- }
- df_method_1 = colwise(function(x) if(class(x)=="factor"){as.integer(as.character(x))}else{x})(dat_df_default)
- df_method_2 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_df_notFactors)
- dt_method_3 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_dt_default)
- dt_method_4 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_dt_nastrings)
- df_method_5 = convert_df_int(dat_df_default, "factor")
- df_method_6 = convert_df_int(dat_df_notFactors, "character")
- df_method_7 = convert_df_int_2(dat_df_notFactors)
- df_method_8 = convert_df_mutate_each(dat_df_default, "factor")
- df_method_9 = convert_df_mutate_each(dat_df_notFactors, "character")
- df_method_10 = convert_df_transform(dat_df_default, "factor")
- df_method_11 = convert_df_transform(dat_df_notFactors, "character")
- dt_method_12 = convert_df_mutate_each(dat_dt_default, "character")
- dt_method_13 = convert_df_mutate_each(dat_dt_nastrings, "character")
- dt_method_14 = convert_df_transform(dat_dt_default, "character")
- dt_method_15 = convert_df_transform(dat_dt_nastrings, "character")
- all.equal(dat_df_nastrings, df_method_1, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_2, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_3, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_4, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_5, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_6, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_7, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_8, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_9, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_10, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, df_method_11, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, dt_method_12, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, dt_method_13, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, dt_method_14, check.attributes = FALSE) # TRUE
- all.equal(dat_df_nastrings, dt_method_15, check.attributes = FALSE) # TRUE
- microbenchmark(
- method_1 = colwise(function(x) if(class(x)=="factor"){as.integer(as.character(x))}else{x})(dat_df_default),
- method_2 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_df_notFactors),
- method_3 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_dt_default),
- method_4 = colwise(function(x) if(class(x)=="character"){as.integer(x)}else{x})(dat_dt_nastrings),
- method_5 = convert_df_int(dat_df_default, "factor"),
- method_6 = convert_df_int(dat_df_notFactors, "character"),
- method_7 = convert_df_int_2(dat_df_notFactors),
- method_8 = convert_df_mutate_each(dat_df_default, "factor"),
- method_9 = convert_df_mutate_each(dat_df_notFactors, "character"),
- method_10 = convert_df_transform(dat_df_default, "factor"),
- method_11 = convert_df_transform(dat_df_notFactors, "character"),
- method_12 = convert_df_mutate_each(dat_dt_default, "character"),
- method_13 = convert_df_mutate_each(dat_dt_nastrings, "character"),
- method_14 = convert_df_transform(dat_dt_default, "character"),
- method_15 = convert_df_transform(dat_dt_nastrings, "character"),
- times = 20)
Advertisement
Add Comment
Please, Sign In to add comment