Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ID Year Temp ph
- 1 P1 1996 11.3 6.80
- 2 P1 1996 9.7 6.90
- 3 P1 1997 9.8 7.10
- ...
- 2000 P2 1997 10.5 6.90
- 2001 P2 1997 9.9 7.00
- 2002 P2 1997 10.0 6.93
- new_df<-df[df$ID %in% sample(unique(dfID),500),]
- library(plyr)
- ddply(df,.(ID),function(x) x[sample(nrow(x),500),])
- set.seed(1)
- mydf <- data.frame(ID = rep(1:3, each = 5), matrix(rnorm(45), ncol = 3))
- mydf
- # ID X1 X2 X3
- # 1 1 -0.6264538 -0.04493361 1.35867955
- # 2 1 0.1836433 -0.01619026 -0.10278773
- # 3 1 -0.8356286 0.94383621 0.38767161
- # 4 1 1.5952808 0.82122120 -0.05380504
- # 5 1 0.3295078 0.59390132 -1.37705956
- # 6 2 -0.8204684 0.91897737 -0.41499456
- # 7 2 0.4874291 0.78213630 -0.39428995
- # 8 2 0.7383247 0.07456498 -0.05931340
- # 9 2 0.5757814 -1.98935170 1.10002537
- # 10 2 -0.3053884 0.61982575 0.76317575
- # 11 3 1.5117812 -0.05612874 -0.16452360
- # 12 3 0.3898432 -0.15579551 -0.25336168
- # 13 3 -0.6212406 -1.47075238 0.69696338
- # 14 3 -2.2146999 -0.47815006 0.55666320
- # 15 3 1.1249309 0.41794156 -0.68875569
- do.call(rbind,
- lapply(split(mydf, mydf$ID),
- function(x) x[sample(nrow(x), 3), ]))
- # ID X1 X2 X3
- # 1.2 1 0.1836433 -0.01619026 -0.1027877
- # 1.1 1 -0.6264538 -0.04493361 1.3586796
- # 1.5 1 0.3295078 0.59390132 -1.3770596
- # 2.10 2 -0.3053884 0.61982575 0.7631757
- # 2.9 2 0.5757814 -1.98935170 1.1000254
- # 2.8 2 0.7383247 0.07456498 -0.0593134
- # 3.13 3 -0.6212406 -1.47075238 0.6969634
- # 3.12 3 0.3898432 -0.15579551 -0.2533617
- # 3.15 3 1.1249309 0.41794156 -0.6887557
- # install.packages("sampling")
- library(sampling)
- set.seed(1)
- x <- strata(mydf, "ID", size = c(2, 3, 2), method = "srswor")
- getdata(mydf, x)
- # X1 X2 X3 ID ID_unit Prob Stratum
- # 2 0.1836433 -0.01619026 -0.1027877 1 2 0.4 1
- # 5 0.3295078 0.59390132 -1.3770596 1 5 0.4 1
- # 6 -0.8204684 0.91897737 -0.4149946 2 6 0.6 2
- # 8 0.7383247 0.07456498 -0.0593134 2 8 0.6 2
- # 9 0.5757814 -1.98935170 1.1000254 2 9 0.6 2
- # 14 -2.2146999 -0.47815006 0.5566632 3 14 0.4 3
- # 15 1.1249309 0.41794156 -0.6887557 3 15 0.4 3
- n <- 8
- df <- mtcars
- df$ID <- df$cyl
- FUN <- function(x, n) {
- if (length(x) <= n) return(x)
- x[x %in% sample(x, n)]
- }
- df[unlist(lapply(split(1:nrow(df), df$ID), FUN, n = 8)), ]
- mydata1 is your original data(not tested)
- mydata2<- split(mydata1,mydata1$ID)
- names(mydata2)<-paste0("mydata2",1:length(levels(ID)))
- mysample<-Map(function(x) x[sample((1:nrow(x)),size=500,replace=FALSE),], mydata2)
- library(plyr)# for rbinding the mysample
- ldply(mysample)
- library(dplyr)
- new_df <- df %>% group_by(ID) %>% sample_n(500)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement