Advertisement
Guest User

Untitled

a guest
Feb 21st, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.23 KB | None | 0 0
  1. > df <- data.frame(letter = sample(letters[1:4], 15, replace=TRUE),
  2. + time = c("one", "one", "one", "two", "two", "one", "two", "two", "two", "one","one","one","two","one","two"),
  3. + stringsAsFactors = FALSE)
  4. > df
  5. letter time
  6. 1 d one
  7. 2 a one
  8. 3 a one
  9. 4 b two
  10. 5 c two
  11. 6 a one
  12. 7 d two
  13. 8 a two
  14. 9 b two
  15. 10 b one
  16. 11 d one
  17. 12 b one
  18. 13 c two
  19. 14 a one
  20. 15 a two
  21.  
  22. > x <- df %>%
  23. + mutate(Value = letter,
  24. + n = n()) %>%
  25. + group_by(Value) %>%
  26. + summarise(Quantity = length(Value),
  27. + Percentage = first(length(Value)/n))
  28. > x
  29. Value Quantity Percentage
  30. 1 a 6 0.4
  31. 2 b 4 0.267
  32. 3 c 2 0.133
  33. 4 d 3 0.2
  34.  
  35. Value time_one Percentage time_two Percentage
  36. 1 a 5 0.5 1 0.2
  37. 2 b 2 0.2 2 0.4
  38. 3 c 1 0.1 1 0.2
  39. 4 d 2 0.2 1 0.2
  40.  
  41. df %>%
  42. group_by(time) %>%
  43. mutate(n_time = n()) %>%
  44. group_by(time, letter) %>%
  45. summarise(n = n(),
  46. percentage = first(n()/n_time)) %>%
  47. ungroup() %>%
  48. gather(var, val, -c(time, letter)) %>%
  49. mutate(var = paste(var, time, sep = "_")) %>%
  50. select(-time) %>%
  51. spread(var, val)
  52.  
  53. letter n_one n_two percentage_one percentage_two
  54. <chr> <dbl> <dbl> <dbl> <dbl>
  55. 1 a 3. 2. 0.375 0.286
  56. 2 b 2. NA 0.250 NA
  57. 3 c 2. 2. 0.250 0.286
  58. 4 d 1. 3. 0.125 0.429
  59.  
  60. df <- data.frame(letter = sample(letters[1:4], 15, replace=TRUE),
  61. time = c("one", "one", "one", "two", "two", "one", "two", "two", "two", "one","one","one","two","one","two"),
  62. stringsAsFactors = FALSE)
  63. # make sure your letter is a factor with all levels otherwise the subsequent cbind doesn#t work
  64. df$letter = factor(df$letter, levels=letters[1:4])
  65.  
  66. # get the counts
  67. x = sapply(split(df$letter, df$time), table)
  68.  
  69. # get the percentages and cbind together
  70. x2 = cbind(x, apply(x, 2, function(x) x/sum(x)))
  71.  
  72. colnames(x2) = c("time_one", "time_two", "percent_one", "percent_two")
  73.  
  74.  
  75. time_one time_two percent_one percent_two
  76. a 0 1 0.0 0.1428571
  77. b 4 4 0.5 0.5714286
  78. c 0 1 0.0 0.1428571
  79. d 4 1 0.5 0.1428571
  80.  
  81. library(data.table)
  82. library(magrittr)
  83. setDT(df)
  84.  
  85. df[, .N, by = .(letter, time)
  86. ][, .(N, percentage = N/sum(N), letter), by = time] %>%
  87. dcast(letter ~ time, value.var = c("N", "percentage"), fill = 0)
  88.  
  89. letter N_one N_two percentage_one percentage_two
  90. 1: a 4 2 0.50 0.2857143
  91. 2: b 2 2 0.25 0.2857143
  92. 3: c 0 2 0.00 0.2857143
  93. 4: d 2 1 0.25 0.1428571
  94.  
  95. df <- structure(list(letter = c("d", "a", "a", "b", "c", "a", "d",
  96. "a", "b", "b", "d", "b", "c", "a", "a"), time = c("one", "one",
  97. "one", "two", "two", "one", "two", "two", "two", "one", "one",
  98. "one", "two", "one", "two")), row.names = c(NA, -15L), class = "data.frame")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement