Advertisement
Guest User

Untitled

a guest
Feb 20th, 2019
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.29 KB | None | 0 0
  1. > df
  2. kwd1 kwd2 sim
  3. 1 a b 1
  4. 2 b a 1
  5. 3 c a 2
  6. 4 a c 2
  7.  
  8. > df
  9. kwd1 kwd2 sim
  10. a b 1
  11. a c 2
  12. b c 0
  13.  
  14. df <- data.frame(c('a', 'b', 'c', 'a'), c('b', 'a', 'a', 'c'), c(.1,.1,.2,.2))
  15. colnames(df) = c('kwd1', 'kwd2', 'sim')
  16. > dput(df)
  17. structure(list(kwd1 = structure(c(1L, 2L, 3L, 1L), .Label = c("a",
  18. "b", "c"), class = "factor"), kwd2 = structure(c(2L, 1L, 1L,
  19. 3L), .Label = c("a", "b", "c"), class = "factor"), sim = c(1,
  20. 1, 2, 2)), .Names = c("kwd1", "kwd2", "sim"), row.names = c(NA,
  21. -4L), class = "data.frame")
  22.  
  23. library(plyr)
  24. res <- merge(expand.grid(kwd1 = unique(c(df$kwd1, df$kwd2)),
  25. kwd2 = unique(c(df$kwd1,
  26. df$kwd2))), df, all.x = T)
  27.  
  28. res <- ddply(res, .(kwd1, kwd2), function(x) {
  29. if (which(letters == x$kwd1) != which(letters == x$kwd2)) {
  30. if (which(letters == x$kwd1) > which(letters == x$kwd2)) {
  31. return(data.frame(kwd1 = x$kwd2, kwd2 = x$kwd1, sim = x$sim))
  32. } else {
  33. return(x)
  34. }
  35. }
  36. })
  37. res1 <- res[!duplicated(res), ]
  38.  
  39. > res1
  40. kwd1 kwd2 sim
  41. 1 a b 0.1
  42. 2 a c 0.2
  43. 4 b c NA
  44.  
  45. convert_df <- function(df) {
  46. res <- merge(expand.grid(kwd1 = unique(c(df$kwd1, df$kwd2)),
  47. kwd2 = unique(c(df$kwd1,
  48. df$kwd2))), df, all.x = T)
  49. res <- ddply(res, .(kwd1, kwd2), function(x) {
  50. if (which(letters == x$kwd1) != which(letters == x$kwd2)) {
  51. if (which(letters == x$kwd1) > which(letters == x$kwd2)) {
  52. return(data.frame(kwd1 = x$kwd2, kwd2 = x$kwd1, sim = x$sim))
  53. } else {
  54. return(x)
  55. }
  56. }
  57. })
  58. return(res[!duplicated(res), ])
  59. }
  60. # Then simply run this to convert your actual data.frame
  61. convert_df(df)
  62.  
  63. # make a data.frame with all possible combinations of kwd1 and kwd2.
  64. # the ones that aren't in df are NA for sim.
  65. k <- merge(expand.grid(kwd1=df$kwd1, kwd2=df$kwd2), df, all=TRUE)
  66. # order the result to put the NA rows at the end, so that rows that are in df
  67. # have priority in the following step.
  68. k <- k[order(k$sim), ]
  69. # remove all rows where the kwd1-kwd2 combo appears earlier in the data.frame
  70. k <- k[! duplicated(apply(k[1:2], MARGIN=1, sort), MARGIN=2), ]
  71. # assuming you don't want the rows where kwd1 and kwd2 are the same, remove them.
  72. k <- subset(k, kwd1 != kwd2)
  73. # set the NA values to 0
  74. k[is.na(k)] <- 0
  75.  
  76. kwd1 kwd2 sim
  77. 5 a b 0.1
  78. 7 a c 0.2
  79. 12 b c 0.0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement