Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # vectorise
- group_f1 = function(data){
- new_col = cut(X[,8], c(-Inf, 0.25, 0.5, 0.75, Inf))
- levels(new_col) <- paste0("group", 1:4)
- cbind(data, new_col)
- }
- # for loop
- group_f2 = function(data){
- new_col = rep(NA, nrow(data))
- for (i in 1:length(new_col)){
- if (data[i,8] < .25){
- new_col[i] = "group1"
- } else if (data[i,8] < .5){
- new_col[i] = "group2"
- } else if (data[i,8] < .75){
- new_col[i] = "group3"
- } else {
- new_col[i] = "group4"
- }
- }
- cbind(data, new_col)
- }
- # sapply
- group_f3 = function(data){
- new_col = sapply(1:nrow(data), function(i){
- if (data[i,8] < .25){
- "group1"
- } else if (data[i,8] < .5){
- "group2"
- } else if (data[i,8] < .75){
- "group3"
- } else {
- "group4"
- }
- })
- cbind(data, new_col)
- }
- # plyr and dplyr
- library(data.table)
- library(plyr)
- library(dplyr)
- library(magrittr)
- group_f4 = function(data){
- data %>% mutate(new_col = cut(X8, c(-Inf, 0.25, 0.5, 0.75, Inf))) %>%
- transform(new_col = mapvalues(new_col, from = levels(new_col),
- to = paste0("group", 1:4)))
- }
- library(rbenchmark)
- ## data generation
- X = sapply((1:10-5.5)*4/20, rnorm, n = 200000)
- X_df = data.frame(X) %>% tbl_df()
- X_dt = data.table(X_df) %>% tbl_dt(FALSE)
- benchmark(
- group_f1(X_df), group_f2(X_df), group_f3(X_df),
- group_f4(X_df), group_f4(X_dt),
- columns = c("test", "replications", "elapsed", "relative", "user.self"),
- order = "relative", replications = 20
- )
- # test replications elapsed relative user.self
- # 1 group_f1(X_df) 20 0.96 1.000 0.95
- # 4 group_f4(X_df) 20 0.98 1.021 0.97
- # 5 group_f4(X_dt) 20 1.23 1.281 1.20
- # 2 group_f2(X_df) 20 417.14 434.521 411.56
- # 3 group_f3(X_df) 20 418.21 435.635 412.50
Advertisement
Add Comment
Please, Sign In to add comment