Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- > df <- data.frame(letter = sample(letters[1:4], 15, replace=TRUE),
- + time = c("one", "one", "one", "two", "two", "one", "two", "two", "two", "one","one","one","two","one","two"),
- + stringsAsFactors = FALSE)
- > df
- letter time
- 1 d one
- 2 a one
- 3 a one
- 4 b two
- 5 c two
- 6 a one
- 7 d two
- 8 a two
- 9 b two
- 10 b one
- 11 d one
- 12 b one
- 13 c two
- 14 a one
- 15 a two
- > x <- df %>%
- + mutate(Value = letter,
- + n = n()) %>%
- + group_by(Value) %>%
- + summarise(Quantity = length(Value),
- + Percentage = first(length(Value)/n))
- > x
- Value Quantity Percentage
- 1 a 6 0.4
- 2 b 4 0.267
- 3 c 2 0.133
- 4 d 3 0.2
- Value time_one Percentage time_two Percentage
- 1 a 5 0.5 1 0.2
- 2 b 2 0.2 2 0.4
- 3 c 1 0.1 1 0.2
- 4 d 2 0.2 1 0.2
- df %>%
- group_by(time) %>%
- mutate(n_time = n()) %>%
- group_by(time, letter) %>%
- summarise(n = n(),
- percentage = first(n()/n_time)) %>%
- ungroup() %>%
- gather(var, val, -c(time, letter)) %>%
- mutate(var = paste(var, time, sep = "_")) %>%
- select(-time) %>%
- spread(var, val)
- letter n_one n_two percentage_one percentage_two
- <chr> <dbl> <dbl> <dbl> <dbl>
- 1 a 3. 2. 0.375 0.286
- 2 b 2. NA 0.250 NA
- 3 c 2. 2. 0.250 0.286
- 4 d 1. 3. 0.125 0.429
- df <- data.frame(letter = sample(letters[1:4], 15, replace=TRUE),
- time = c("one", "one", "one", "two", "two", "one", "two", "two", "two", "one","one","one","two","one","two"),
- stringsAsFactors = FALSE)
- # make sure your letter is a factor with all levels otherwise the subsequent cbind doesn#t work
- df$letter = factor(df$letter, levels=letters[1:4])
- # get the counts
- x = sapply(split(df$letter, df$time), table)
- # get the percentages and cbind together
- x2 = cbind(x, apply(x, 2, function(x) x/sum(x)))
- colnames(x2) = c("time_one", "time_two", "percent_one", "percent_two")
- time_one time_two percent_one percent_two
- a 0 1 0.0 0.1428571
- b 4 4 0.5 0.5714286
- c 0 1 0.0 0.1428571
- d 4 1 0.5 0.1428571
- library(data.table)
- library(magrittr)
- setDT(df)
- df[, .N, by = .(letter, time)
- ][, .(N, percentage = N/sum(N), letter), by = time] %>%
- dcast(letter ~ time, value.var = c("N", "percentage"), fill = 0)
- letter N_one N_two percentage_one percentage_two
- 1: a 4 2 0.50 0.2857143
- 2: b 2 2 0.25 0.2857143
- 3: c 0 2 0.00 0.2857143
- 4: d 2 1 0.25 0.1428571
- df <- structure(list(letter = c("d", "a", "a", "b", "c", "a", "d",
- "a", "b", "b", "d", "b", "c", "a", "a"), time = c("one", "one",
- "one", "two", "two", "one", "two", "two", "two", "one", "one",
- "one", "two", "one", "two")), row.names = c(NA, -15L), class = "data.frame")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement