Advertisement
Guest User

Untitled

a guest
Jan 16th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. Category Frequency
  2. First 10
  3. First 15
  4. First 5
  5. Second 2
  6. Third 14
  7. Third 20
  8. Second 3
  9.  
  10. Category Frequency
  11. First 30
  12. Second 5
  13. Third 34
  14.  
  15. x <- data.frame(Category=factor(c("First", "First", "First", "Second",
  16. "Third", "Third", "Second")),
  17. Frequency=c(10,15,5,2,14,20,3))
  18. aggregate(x$Frequency, by=list(Category=x$Category), FUN=sum)
  19. Category x
  20. 1 First 30
  21. 2 Second 5
  22. 3 Third 34
  23.  
  24. aggregate(Frequency ~ Category, x, sum)
  25.  
  26. aggregate(. ~ Category, x, sum)
  27.  
  28. tapply(x$Frequency, x$Category, FUN=sum)
  29. First Second Third
  30. 30 5 34
  31.  
  32. library(dplyr)
  33. x %>%
  34. group_by(Category) %>%
  35. summarise(Frequency = sum(Frequency))
  36.  
  37. #Source: local data frame [3 x 2]
  38. #
  39. # Category Frequency
  40. #1 First 30
  41. #2 Second 5
  42. #3 Third 34
  43.  
  44. x %>%
  45. group_by(Category) %>%
  46. summarise_each(funs(sum))
  47.  
  48. mtcars %>%
  49. group_by(cyl, gear) %>% # multiple group columns
  50. summarise(max_hp = max(hp), mean_mpg = mean(mpg)) # multiple summary columns
  51.  
  52. library(data.table)
  53. data = data.table(Category=c("First","First","First","Second","Third", "Third", "Second"),
  54. Frequency=c(10,15,5,2,14,20,3))
  55. data[, sum(Frequency), by = Category]
  56. # Category V1
  57. # 1: First 30
  58. # 2: Second 5
  59. # 3: Third 34
  60. system.time(data[, sum(Frequency), by = Category] )
  61. # user system elapsed
  62. # 0.008 0.001 0.009
  63.  
  64. data = data.frame(Category=c("First","First","First","Second","Third", "Third", "Second"),
  65. Frequency=c(10,15,5,2,14,20,3))
  66. system.time(aggregate(data$Frequency, by=list(Category=data$Category), FUN=sum))
  67. # user system elapsed
  68. # 0.008 0.000 0.015
  69.  
  70. data[,list(Frequency=sum(Frequency)),by=Category]
  71. # Category Frequency
  72. # 1: First 30
  73. # 2: Second 5
  74. # 3: Third 34
  75.  
  76. data = data.table(Category=rep(c("First", "Second", "Third"), 100000),
  77. Frequency=rnorm(100000))
  78. system.time( data[,sum(Frequency),by=Category] )
  79. # user system elapsed
  80. # 0.055 0.004 0.059
  81. data = data.frame(Category=rep(c("First", "Second", "Third"), 100000),
  82. Frequency=rnorm(100000))
  83. system.time( aggregate(data$Frequency, by=list(Category=data$Category), FUN=sum) )
  84. # user system elapsed
  85. # 0.287 0.010 0.296
  86.  
  87. data[, lapply(.SD, sum), by = Category]
  88. # Category Frequency
  89. # 1: First 30
  90. # 2: Second 5
  91. # 3: Third 34
  92.  
  93. x2 <- by(x$Frequency, x$Category, sum)
  94. do.call(rbind,as.list(x2))
  95.  
  96. library(plyr)
  97. ddply(tbl, .(Category), summarise, sum = sum(Frequency))
  98.  
  99. require(doBy)
  100. summaryBy(Frequency~Category, data=yourdataframe, FUN=sum)
  101.  
  102. require(reshape)
  103. recast(x, Category ~ ., fun.aggregate=sum)
  104.  
  105. xtabs(Frequency ~ Category, df)
  106. # Category
  107. # First Second Third
  108. # 30 5 34
  109.  
  110. as.data.frame(xtabs(Frequency ~ Category, df))
  111. # Category Freq
  112. # 1 First 30
  113. # 2 Second 5
  114. # 3 Third 34
  115.  
  116. x <- data.frame(Category=factor(c("First", "First", "First", "Second",
  117. "Third", "Third", "Second")),
  118. Frequency=c(10,15,5,2,14,20,3))
  119.  
  120. sqldf("select
  121. Category
  122. ,sum(Frequency) as Frequency
  123. from x
  124. group by
  125. Category")
  126.  
  127. ## Category Frequency
  128. ## 1 First 30
  129. ## 2 Second 5
  130. ## 3 Third 34
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement