Advertisement
Guest User

Untitled

a guest
Apr 19th, 2014
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.36 KB | None | 0 0
  1. N = 9
  2. set.seed(1234)
  3. df <- data.frame(id=c(1,1,1,2,2,2,3,3,3), date=c('2005','2006','2007'),
  4. Field1 = ifelse(runif(N)>.5, runif(N, 5,30), NA),
  5. Field2 = ifelse(runif(N)>.5, runif(N, 4,22), NA),
  6. Field3 = ifelse(runif(N)>.5, runif(N, 7,18), NA),
  7. Field4 = ifelse(runif(N)>.5, runif(N, 9,25), NA),
  8. Field5 = ifelse(runif(N)>.5, runif(N, 3,30), NA) )
  9.  
  10. # > df
  11. # id date Field1 Field2 Field3 Field4 Field5
  12. # 1 1 2005 NA NA NA NA NA
  13. # 2 1 2006 22.33978 NA NA 12.824412 6.850614
  14. # 3 1 2007 18.62437 NA 12.334904 NA NA
  15. # 4 2 2005 12.06834 NA 9.683217 13.929516 8.296716
  16. # 5 2 2006 28.08584 NA 15.420058 NA NA
  17. # 6 2 2007 12.30790 NA 7.811579 9.826346 NA
  18. # 7 3 2005 NA NA NA 18.033117 NA
  19. # 8 3 2006 NA 7.259732 14.889989 NA 7.320774
  20. # 9 3 2007 11.67052 17.674071 NA NA 27.197018
  21.  
  22.  
  23. # Trying to summarize by the count of non-NAs in each row...!
  24. df %.% regroup(list(quote(id),quote(date))) %.%
  25. summarize(nna_count = sum(!is.na(Field1) + !is.na(Field2) + !is.na(Field3) + !is.na(Field4) + !is.na(Field5)))
  26.  
  27. # TOTALLY WRONG?!
  28.  
  29. # Source: local data frame [9 x 3]
  30. # Groups: id
  31. #
  32. # id date nna_count
  33. # 1 1 2005 0
  34. # 2 1 2006 1
  35. # 3 1 2007 1
  36. # 4 2 2005 1
  37. # 5 2 2006 1
  38. # 6 2 2007 1
  39. # 7 3 2005 0
  40. # 8 3 2006 0
  41. # 9 3 2007 0
  42.  
  43. mutate(na_count = sum(16*!is.na(Field1) + 8*!is.na(Field2) + 4*!is.na(Field3) + 2*!is.na(Field4) + !is.na(Field5)))
  44.  
  45. df %.%
  46. group_by(id,date) %.%
  47. summarise(new=
  48. (!is.na(Field1)) + (!is.na(Field2)) + (!is.na(Field3)) +
  49. (!is.na(Field4)) + (!is.na(Field5))
  50. ) %.%
  51. arrange(id,date)
  52.  
  53.  
  54. #Source: local data frame [9 x 3]
  55. #Groups: id
  56. #
  57. # id date new
  58. #1 1 2005 0
  59. #2 1 2006 3
  60. #3 1 2007 2
  61. #4 2 2005 4
  62. #5 2 2006 2
  63. #6 2 2007 3
  64. #7 3 2005 1
  65. #8 3 2006 3
  66. #9 3 2007 3
  67.  
  68. > df %.% regroup(list(quote(id),quote(date))) %.%
  69. + summarize(na_count = sum(!is.na(c(Field1,Field2,Field3,Field4,Field5))))
  70. Source: local data frame [9 x 3]
  71. Groups: id
  72.  
  73. id date na_count
  74. 1 1 2005 0
  75. 2 1 2006 3
  76. 3 1 2007 2
  77. 4 2 2005 4
  78. 5 2 2006 2
  79. 6 2 2007 3
  80. 7 3 2005 1
  81. 8 3 2006 3
  82. 9 3 2007 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement