Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- DF <- matrix(sample(1:9,9),ncol=3,nrow=3)
- DF <- as.data.frame.matrix(DF)
- > DF
- V1 V2 V3
- 1 2 7 9
- 2 8 3 6
- 3 1 5 4
- > DF2
- RE
- 1 V3
- 2 V1
- 3 V2
- DF <- data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
- colnames(DF)[apply(DF,1,which.max)]
- [1] "V3" "V1" "V2"
- colnames(DF)[max.col(DF,ties.method="first")]
- #[1] "V3" "V1" "V2"
- DF <- data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(7,6,4))
- apply(DF,1,function(x) which(x==max(x)))
- [[1]]
- V2 V3
- 2 3
- [[2]]
- V1
- 1
- [[3]]
- V2
- 2
- set.seed(45)
- DF <- data.frame(matrix(sample(10, 26746*18, TRUE), ncol=18))
- require(data.table)
- DT <- data.table(value=unlist(DF, use.names=FALSE),
- colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
- setkey(DT, colid, value)
- t1 <- DT[J(unique(colid), DT[J(unique(colid)), value, mult="last"]), rowid, mult="first"]
- # data.table solution
- system.time({
- DT <- data.table(value=unlist(DF, use.names=FALSE),
- colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
- setkey(DT, colid, value)
- t1 <- DT[J(unique(colid), DT[J(unique(colid)), value, mult="last"]), rowid, mult="first"]
- })
- # user system elapsed
- # 0.174 0.029 0.227
- # apply solution from @thelatemail
- system.time(t2 <- colnames(DF)[apply(DF,1,which.max)])
- # user system elapsed
- # 2.322 0.036 2.602
- identical(t1, t2)
- # [1] TRUE
- DT <- data.table(value=unlist(DF, use.names=FALSE),
- colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
- setkey(DT, colid, value)
- t1 <- DT[J(unique(colid)), rowid, mult="last"]
- library(data.table)
- set.seed(45)
- DT <- data.table(matrix(sample(10, 10^7, TRUE), ncol=10))
- system.time(
- DT[, col_max := colnames(.SD)[max.col(.SD, ties.method = "first")]]
- )
- #> user system elapsed
- #> 0.15 0.06 0.21
- DT[]
- #> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 col_max
- #> 1: 7 4 1 2 3 7 6 6 6 1 V1
- #> 2: 4 6 9 10 6 2 7 7 1 3 V4
- #> 3: 3 4 9 8 9 9 8 8 6 7 V3
- #> 4: 4 8 8 9 7 5 9 2 7 1 V4
- #> 5: 4 3 9 10 2 7 9 6 6 9 V4
- #> ---
- #> 999996: 4 6 10 5 4 7 3 8 2 8 V3
- #> 999997: 8 7 6 6 3 10 2 3 10 1 V6
- #> 999998: 2 3 2 7 4 7 5 2 7 3 V4
- #> 999999: 8 10 3 2 3 4 5 1 1 4 V2
- #> 1000000: 10 4 2 6 6 2 8 4 7 4 V1
- DT[, MAX2 := colnames(.SD)[max.col(.SD, ties.method="first")], .SDcols = c("V9", "V10")]
- DT[, col_min := colnames(.SD)[max.col(-.SD, ties.method = "first")]]
- library(tidyverse)
- # sample data frame with a tie
- df <- data_frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,5))
- # If you aren't worried about ties:
- df %>%
- rownames_to_column('id') %>% # creates an ID number
- gather(dept, cnt, V1:V3) %>%
- group_by(id) %>%
- slice(which.max(cnt))
- # A tibble: 3 x 3
- # Groups: id [3]
- id dept cnt
- <chr> <chr> <dbl>
- 1 1 V3 9.
- 2 2 V1 8.
- 3 3 V2 5.
- # If you're worried about keeping ties:
- df %>%
- rownames_to_column('id') %>%
- gather(dept, cnt, V1:V3) %>%
- group_by(id) %>%
- filter(cnt == max(cnt)) %>% # top_n(cnt, n = 1) also works
- arrange(id)
- # A tibble: 4 x 3
- # Groups: id [3]
- id dept cnt
- <chr> <chr> <dbl>
- 1 1 V3 9.
- 2 2 V1 8.
- 3 3 V2 5.
- 4 3 V3 5.
- # If you're worried about ties, but only want a certain department, you could use rank() and choose 'first' or 'last'
- df %>%
- rownames_to_column('id') %>%
- gather(dept, cnt, V1:V3) %>%
- group_by(id) %>%
- mutate(dept_rank = rank(-cnt, ties.method = "first")) %>% # or 'last'
- filter(dept_rank == 1) %>%
- select(-dept_rank)
- # A tibble: 3 x 3
- # Groups: id [3]
- id dept cnt
- <chr> <chr> <dbl>
- 1 2 V1 8.
- 2 3 V2 5.
- 3 1 V3 9.
- # if you wanted to keep the original wide data frame
- df %>%
- rownames_to_column('id') %>%
- left_join(
- df %>%
- rownames_to_column('id') %>%
- gather(max_dept, max_cnt, V1:V3) %>%
- group_by(id) %>%
- slice(which.max(max_cnt)),
- by = 'id'
- )
- # A tibble: 3 x 6
- id V1 V2 V3 max_dept max_cnt
- <chr> <dbl> <dbl> <dbl> <chr> <dbl>
- 1 1 2. 7. 9. V3 9.
- 2 2 8. 3. 6. V1 8.
- 3 3 1. 5. 5. V2 5.
- DF = data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
- DF %>%
- rownames_to_column() %>%
- gather(column, value, -rowname) %>%
- group_by(rowname) %>%
- filter(rank(-value) == 1)
- # A tibble: 3 x 3
- # Groups: rowname [3]
- rowname column value
- <chr> <chr> <dbl>
- 1 2 V1 8
- 2 3 V2 5
- 3 1 V3 9
- DF %>%
- rownames_to_column() %>%
- gather(column, value, -rowname) %>%
- group_by(rowname) %>%
- mutate(rk = rank(-value)) %>%
- filter(rk <= 2) %>%
- arrange(rowname, rk)
- # A tibble: 6 x 4
- # Groups: rowname [3]
- rowname column value rk
- <chr> <chr> <dbl> <dbl>
- 1 1 V3 9 1
- 2 1 V2 7 2
- 3 2 V1 8 1
- 4 2 V3 6 2
- 5 3 V2 5 1
- 6 3 V3 4 2
- > df<-data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
- > df
- V1 V2 V3
- 1 2 7 9
- 2 8 3 6
- 3 1 5 4
- > df2<-data.frame()
- > for (i in 1:nrow(df)){
- + df2[i,1]<-colnames(df[which.max(df[i,])])
- + }
- > df2
- V1
- 1 V3
- 2 V1
- 3 V2
- j1 <- max.col(yourDF[, .(V1, V2, V3, V4)], "first")
- yourDF$newCol <- c("V1", "V2", "V3", "V4")[j1]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement