Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Category Frequency
- First 10
- First 15
- First 5
- Second 2
- Third 14
- Third 20
- Second 3
- Category Frequency
- First 30
- Second 5
- Third 34
- x <- data.frame(Category=factor(c("First", "First", "First", "Second",
- "Third", "Third", "Second")),
- Frequency=c(10,15,5,2,14,20,3))
- aggregate(x$Frequency, by=list(Category=x$Category), FUN=sum)
- Category x
- 1 First 30
- 2 Second 5
- 3 Third 34
- aggregate(Frequency ~ Category, x, sum)
- aggregate(. ~ Category, x, sum)
- tapply(x$Frequency, x$Category, FUN=sum)
- First Second Third
- 30 5 34
- library(dplyr)
- x %>%
- group_by(Category) %>%
- summarise(Frequency = sum(Frequency))
- #Source: local data frame [3 x 2]
- #
- # Category Frequency
- #1 First 30
- #2 Second 5
- #3 Third 34
- x %>%
- group_by(Category) %>%
- summarise_each(funs(sum))
- mtcars %>%
- group_by(cyl, gear) %>% # multiple group columns
- summarise(max_hp = max(hp), mean_mpg = mean(mpg)) # multiple summary columns
- library(data.table)
- data = data.table(Category=c("First","First","First","Second","Third", "Third", "Second"),
- Frequency=c(10,15,5,2,14,20,3))
- data[, sum(Frequency), by = Category]
- # Category V1
- # 1: First 30
- # 2: Second 5
- # 3: Third 34
- system.time(data[, sum(Frequency), by = Category] )
- # user system elapsed
- # 0.008 0.001 0.009
- data = data.frame(Category=c("First","First","First","Second","Third", "Third", "Second"),
- Frequency=c(10,15,5,2,14,20,3))
- system.time(aggregate(data$Frequency, by=list(Category=data$Category), FUN=sum))
- # user system elapsed
- # 0.008 0.000 0.015
- data[,list(Frequency=sum(Frequency)),by=Category]
- # Category Frequency
- # 1: First 30
- # 2: Second 5
- # 3: Third 34
- data = data.table(Category=rep(c("First", "Second", "Third"), 100000),
- Frequency=rnorm(100000))
- system.time( data[,sum(Frequency),by=Category] )
- # user system elapsed
- # 0.055 0.004 0.059
- data = data.frame(Category=rep(c("First", "Second", "Third"), 100000),
- Frequency=rnorm(100000))
- system.time( aggregate(data$Frequency, by=list(Category=data$Category), FUN=sum) )
- # user system elapsed
- # 0.287 0.010 0.296
- data[, lapply(.SD, sum), by = Category]
- # Category Frequency
- # 1: First 30
- # 2: Second 5
- # 3: Third 34
- x2 <- by(x$Frequency, x$Category, sum)
- do.call(rbind,as.list(x2))
- library(plyr)
- ddply(tbl, .(Category), summarise, sum = sum(Frequency))
- require(doBy)
- summaryBy(Frequency~Category, data=yourdataframe, FUN=sum)
- require(reshape)
- recast(x, Category ~ ., fun.aggregate=sum)
- xtabs(Frequency ~ Category, df)
- # Category
- # First Second Third
- # 30 5 34
- as.data.frame(xtabs(Frequency ~ Category, df))
- # Category Freq
- # 1 First 30
- # 2 Second 5
- # 3 Third 34
- x <- data.frame(Category=factor(c("First", "First", "First", "Second",
- "Third", "Third", "Second")),
- Frequency=c(10,15,5,2,14,20,3))
- sqldf("select
- Category
- ,sum(Frequency) as Frequency
- from x
- group by
- Category")
- ## Category Frequency
- ## 1 First 30
- ## 2 Second 5
- ## 3 Third 34
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement