Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.43 KB | None | 0 0
  1. DF <- matrix(sample(1:9,9),ncol=3,nrow=3)
  2. DF <- as.data.frame.matrix(DF)
  3. > DF
  4. V1 V2 V3
  5. 1 2 7 9
  6. 2 8 3 6
  7. 3 1 5 4
  8.  
  9. > DF2
  10. RE
  11. 1 V3
  12. 2 V1
  13. 3 V2
  14.  
  15. DF <- data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
  16.  
  17. colnames(DF)[apply(DF,1,which.max)]
  18. [1] "V3" "V1" "V2"
  19.  
  20. colnames(DF)[max.col(DF,ties.method="first")]
  21. #[1] "V3" "V1" "V2"
  22.  
  23. DF <- data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(7,6,4))
  24. apply(DF,1,function(x) which(x==max(x)))
  25.  
  26. [[1]]
  27. V2 V3
  28. 2 3
  29.  
  30. [[2]]
  31. V1
  32. 1
  33.  
  34. [[3]]
  35. V2
  36. 2
  37.  
  38. set.seed(45)
  39. DF <- data.frame(matrix(sample(10, 26746*18, TRUE), ncol=18))
  40.  
  41. require(data.table)
  42. DT <- data.table(value=unlist(DF, use.names=FALSE),
  43. colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
  44. setkey(DT, colid, value)
  45. t1 <- DT[J(unique(colid), DT[J(unique(colid)), value, mult="last"]), rowid, mult="first"]
  46.  
  47. # data.table solution
  48. system.time({
  49. DT <- data.table(value=unlist(DF, use.names=FALSE),
  50. colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
  51. setkey(DT, colid, value)
  52. t1 <- DT[J(unique(colid), DT[J(unique(colid)), value, mult="last"]), rowid, mult="first"]
  53. })
  54. # user system elapsed
  55. # 0.174 0.029 0.227
  56.  
  57. # apply solution from @thelatemail
  58. system.time(t2 <- colnames(DF)[apply(DF,1,which.max)])
  59. # user system elapsed
  60. # 2.322 0.036 2.602
  61.  
  62. identical(t1, t2)
  63. # [1] TRUE
  64.  
  65. DT <- data.table(value=unlist(DF, use.names=FALSE),
  66. colid = 1:nrow(DF), rowid = rep(names(DF), each=nrow(DF)))
  67. setkey(DT, colid, value)
  68. t1 <- DT[J(unique(colid)), rowid, mult="last"]
  69.  
  70. library(data.table)
  71.  
  72. set.seed(45)
  73. DT <- data.table(matrix(sample(10, 10^7, TRUE), ncol=10))
  74.  
  75. system.time(
  76. DT[, col_max := colnames(.SD)[max.col(.SD, ties.method = "first")]]
  77. )
  78. #> user system elapsed
  79. #> 0.15 0.06 0.21
  80. DT[]
  81. #> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 col_max
  82. #> 1: 7 4 1 2 3 7 6 6 6 1 V1
  83. #> 2: 4 6 9 10 6 2 7 7 1 3 V4
  84. #> 3: 3 4 9 8 9 9 8 8 6 7 V3
  85. #> 4: 4 8 8 9 7 5 9 2 7 1 V4
  86. #> 5: 4 3 9 10 2 7 9 6 6 9 V4
  87. #> ---
  88. #> 999996: 4 6 10 5 4 7 3 8 2 8 V3
  89. #> 999997: 8 7 6 6 3 10 2 3 10 1 V6
  90. #> 999998: 2 3 2 7 4 7 5 2 7 3 V4
  91. #> 999999: 8 10 3 2 3 4 5 1 1 4 V2
  92. #> 1000000: 10 4 2 6 6 2 8 4 7 4 V1
  93.  
  94. DT[, MAX2 := colnames(.SD)[max.col(.SD, ties.method="first")], .SDcols = c("V9", "V10")]
  95.  
  96. DT[, col_min := colnames(.SD)[max.col(-.SD, ties.method = "first")]]
  97.  
  98. library(tidyverse)
  99.  
  100. # sample data frame with a tie
  101. df <- data_frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,5))
  102.  
  103. # If you aren't worried about ties:
  104. df %>%
  105. rownames_to_column('id') %>% # creates an ID number
  106. gather(dept, cnt, V1:V3) %>%
  107. group_by(id) %>%
  108. slice(which.max(cnt))
  109.  
  110. # A tibble: 3 x 3
  111. # Groups: id [3]
  112. id dept cnt
  113. <chr> <chr> <dbl>
  114. 1 1 V3 9.
  115. 2 2 V1 8.
  116. 3 3 V2 5.
  117.  
  118.  
  119. # If you're worried about keeping ties:
  120. df %>%
  121. rownames_to_column('id') %>%
  122. gather(dept, cnt, V1:V3) %>%
  123. group_by(id) %>%
  124. filter(cnt == max(cnt)) %>% # top_n(cnt, n = 1) also works
  125. arrange(id)
  126.  
  127. # A tibble: 4 x 3
  128. # Groups: id [3]
  129. id dept cnt
  130. <chr> <chr> <dbl>
  131. 1 1 V3 9.
  132. 2 2 V1 8.
  133. 3 3 V2 5.
  134. 4 3 V3 5.
  135.  
  136.  
  137. # If you're worried about ties, but only want a certain department, you could use rank() and choose 'first' or 'last'
  138. df %>%
  139. rownames_to_column('id') %>%
  140. gather(dept, cnt, V1:V3) %>%
  141. group_by(id) %>%
  142. mutate(dept_rank = rank(-cnt, ties.method = "first")) %>% # or 'last'
  143. filter(dept_rank == 1) %>%
  144. select(-dept_rank)
  145.  
  146. # A tibble: 3 x 3
  147. # Groups: id [3]
  148. id dept cnt
  149. <chr> <chr> <dbl>
  150. 1 2 V1 8.
  151. 2 3 V2 5.
  152. 3 1 V3 9.
  153.  
  154. # if you wanted to keep the original wide data frame
  155. df %>%
  156. rownames_to_column('id') %>%
  157. left_join(
  158. df %>%
  159. rownames_to_column('id') %>%
  160. gather(max_dept, max_cnt, V1:V3) %>%
  161. group_by(id) %>%
  162. slice(which.max(max_cnt)),
  163. by = 'id'
  164. )
  165.  
  166. # A tibble: 3 x 6
  167. id V1 V2 V3 max_dept max_cnt
  168. <chr> <dbl> <dbl> <dbl> <chr> <dbl>
  169. 1 1 2. 7. 9. V3 9.
  170. 2 2 8. 3. 6. V1 8.
  171. 3 3 1. 5. 5. V2 5.
  172.  
  173. DF = data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
  174. DF %>%
  175. rownames_to_column() %>%
  176. gather(column, value, -rowname) %>%
  177. group_by(rowname) %>%
  178. filter(rank(-value) == 1)
  179.  
  180. # A tibble: 3 x 3
  181. # Groups: rowname [3]
  182. rowname column value
  183. <chr> <chr> <dbl>
  184. 1 2 V1 8
  185. 2 3 V2 5
  186. 3 1 V3 9
  187.  
  188. DF %>%
  189. rownames_to_column() %>%
  190. gather(column, value, -rowname) %>%
  191. group_by(rowname) %>%
  192. mutate(rk = rank(-value)) %>%
  193. filter(rk <= 2) %>%
  194. arrange(rowname, rk)
  195.  
  196. # A tibble: 6 x 4
  197. # Groups: rowname [3]
  198. rowname column value rk
  199. <chr> <chr> <dbl> <dbl>
  200. 1 1 V3 9 1
  201. 2 1 V2 7 2
  202. 3 2 V1 8 1
  203. 4 2 V3 6 2
  204. 5 3 V2 5 1
  205. 6 3 V3 4 2
  206.  
  207. > df<-data.frame(V1=c(2,8,1),V2=c(7,3,5),V3=c(9,6,4))
  208. > df
  209. V1 V2 V3
  210. 1 2 7 9
  211. 2 8 3 6
  212. 3 1 5 4
  213. > df2<-data.frame()
  214. > for (i in 1:nrow(df)){
  215. + df2[i,1]<-colnames(df[which.max(df[i,])])
  216. + }
  217. > df2
  218. V1
  219. 1 V3
  220. 2 V1
  221. 3 V2
  222.  
  223. j1 <- max.col(yourDF[, .(V1, V2, V3, V4)], "first")
  224. yourDF$newCol <- c("V1", "V2", "V3", "V4")[j1]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement