Advertisement
Guest User

Untitled

a guest
Aug 30th, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.80 KB | None | 0 0
  1. ID Year Temp ph
  2. 1 P1 1996 11.3 6.80
  3. 2 P1 1996 9.7 6.90
  4. 3 P1 1997 9.8 7.10
  5. ...
  6. 2000 P2 1997 10.5 6.90
  7. 2001 P2 1997 9.9 7.00
  8. 2002 P2 1997 10.0 6.93
  9.  
  10. new_df<-df[df$ID %in% sample(unique(dfID),500),]
  11.  
  12. library(plyr)
  13. ddply(df,.(ID),function(x) x[sample(nrow(x),500),])
  14.  
  15. set.seed(1)
  16. mydf <- data.frame(ID = rep(1:3, each = 5), matrix(rnorm(45), ncol = 3))
  17. mydf
  18. # ID X1 X2 X3
  19. # 1 1 -0.6264538 -0.04493361 1.35867955
  20. # 2 1 0.1836433 -0.01619026 -0.10278773
  21. # 3 1 -0.8356286 0.94383621 0.38767161
  22. # 4 1 1.5952808 0.82122120 -0.05380504
  23. # 5 1 0.3295078 0.59390132 -1.37705956
  24. # 6 2 -0.8204684 0.91897737 -0.41499456
  25. # 7 2 0.4874291 0.78213630 -0.39428995
  26. # 8 2 0.7383247 0.07456498 -0.05931340
  27. # 9 2 0.5757814 -1.98935170 1.10002537
  28. # 10 2 -0.3053884 0.61982575 0.76317575
  29. # 11 3 1.5117812 -0.05612874 -0.16452360
  30. # 12 3 0.3898432 -0.15579551 -0.25336168
  31. # 13 3 -0.6212406 -1.47075238 0.69696338
  32. # 14 3 -2.2146999 -0.47815006 0.55666320
  33. # 15 3 1.1249309 0.41794156 -0.68875569
  34.  
  35. do.call(rbind,
  36. lapply(split(mydf, mydf$ID),
  37. function(x) x[sample(nrow(x), 3), ]))
  38. # ID X1 X2 X3
  39. # 1.2 1 0.1836433 -0.01619026 -0.1027877
  40. # 1.1 1 -0.6264538 -0.04493361 1.3586796
  41. # 1.5 1 0.3295078 0.59390132 -1.3770596
  42. # 2.10 2 -0.3053884 0.61982575 0.7631757
  43. # 2.9 2 0.5757814 -1.98935170 1.1000254
  44. # 2.8 2 0.7383247 0.07456498 -0.0593134
  45. # 3.13 3 -0.6212406 -1.47075238 0.6969634
  46. # 3.12 3 0.3898432 -0.15579551 -0.2533617
  47. # 3.15 3 1.1249309 0.41794156 -0.6887557
  48.  
  49. # install.packages("sampling")
  50. library(sampling)
  51. set.seed(1)
  52. x <- strata(mydf, "ID", size = c(2, 3, 2), method = "srswor")
  53. getdata(mydf, x)
  54. # X1 X2 X3 ID ID_unit Prob Stratum
  55. # 2 0.1836433 -0.01619026 -0.1027877 1 2 0.4 1
  56. # 5 0.3295078 0.59390132 -1.3770596 1 5 0.4 1
  57. # 6 -0.8204684 0.91897737 -0.4149946 2 6 0.6 2
  58. # 8 0.7383247 0.07456498 -0.0593134 2 8 0.6 2
  59. # 9 0.5757814 -1.98935170 1.1000254 2 9 0.6 2
  60. # 14 -2.2146999 -0.47815006 0.5566632 3 14 0.4 3
  61. # 15 1.1249309 0.41794156 -0.6887557 3 15 0.4 3
  62.  
  63. n <- 8
  64. df <- mtcars
  65. df$ID <- df$cyl
  66.  
  67. FUN <- function(x, n) {
  68. if (length(x) <= n) return(x)
  69. x[x %in% sample(x, n)]
  70. }
  71.  
  72. df[unlist(lapply(split(1:nrow(df), df$ID), FUN, n = 8)), ]
  73.  
  74. mydata1 is your original data(not tested)
  75.  
  76. mydata2<- split(mydata1,mydata1$ID)
  77. names(mydata2)<-paste0("mydata2",1:length(levels(ID)))
  78. mysample<-Map(function(x) x[sample((1:nrow(x)),size=500,replace=FALSE),], mydata2)
  79.  
  80. library(plyr)# for rbinding the mysample
  81. ldply(mysample)
  82.  
  83. library(dplyr)
  84. new_df <- df %>% group_by(ID) %>% sample_n(500)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement