Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- strata.samp <- function(df, n)
- {
- # strata.samp as function name
- # df and n are passed arguments
- # df is the data frame having the k columns as strata and
- # n is the sample size
- # writing function for the stratified sampling
- # where we need to pass the strata as data frame and
- # sample size as arguments to
- # find the population mean, srs mean, stratified mean,
- # variance of srs mean,
- # variance of stratified sampling mean,
- # standard error of stratified sampling mean and
- # confidence interval of stratified sampling mean
- # finding the number of strata
- k <- ncol(df)
- st.size <- array() # making array of the strata sizes
- pop <- vector() # making a vector of all population
- for (i in 1:k)
- {
- # finding the length of ith stratum
- st.size[i] <- length(na.omit(df[, i]))
- pop <- c(pop, na.omit(df[, i])) # merging the strata
- }
- pop.size <- length(pop) # population size
- samp.st.size <- array() # making array for sample strata sizes
- samp.mean.st <- array() # array of sample means of the strata
- var.samp.st <- array() # array of sample variances of strata
- for (i in 1:k)
- {
- # finding sample size of ith stratum
- samp.st.size[i] <- round(n * st.size[i] / pop.size)
- # taking sample from ith stratum & storing them to temp
- set.seed(0000)
- temp = sample(na.omit(df[, i]), samp.st.size[i])
- samp.mean.st[i] <- mean(temp) # sample mean of ith stratum
- var.samp.st[i] <- var(temp) # sample variance of ith stratum
- }
- pop.mean <- mean(pop) # population mean
- set.seed(0000)
- srs <- sample(pop, n) # simple random sampling without replacement
- mean.srs <- mean(srs) # finding srs mean
- # finding the variance of srswor mean using formula
- var.srs.mean <- (1 - n / pop.size) * var(srs) / n
- # finding stratified mean
- final.samp.mean.st <- weighted.mean(x = samp.mean.st, w = samp.st.size)
- # finding the variance of stratified sampling mean
- # proportions of the sizes of strata and population size
- w <- st.size / pop.size
- # proportions of sample unit and population unit under strata
- f <- samp.st.size / st.size
- # initialization because we need to
- # increment the variable var.st.mean
- var.st.mean <- 0
- # loop for finding the variance of stratified
- # sampling mean to maintain the iterative sum
- # i is iterative variable
- for (i in 1:k)
- {
- # formula to find the variance of
- # stratified sampling mean
- # passing iterative integer variable i
- # within square brackets
- # which lies between 1 to k
- # to index the arrays
- var.st.mean <- var.st.mean +
- w[i] ^ 2 * (1 - f[i]) * var.samp.st[i] / samp.st.size[i]
- }
- se.st.mean <- sqrt(var.st.mean) # standard error
- margin.err.st.mean <- 1.96 * se.st.mean # margin of error
- # lower value at 95% confidence interval
- lower.confi <- final.samp.mean.st - margin.err.st.mean
- # higher value at 95% confidence interval
- higher.confi <- final.samp.mean.st + margin.err.st.mean
- # 95% confidence interval
- confi.interv <- list("at 95% Lower" = lower.confi,
- "at 95% Higher" = higher.confi)
- # making the list of all expected answers
- list.results <- list("Population Mean" = pop.mean,
- "SRS Mean" = mean.srs,
- "Variance of SRS Mean" = var.srs.mean,
- "Stratified Sampling Mean" = final.samp.mean.st,
- "Variance of Stratified Sampling Mean" =
- var.st.mean,
- "Standard Error of Stratified Sampling Mean" =
- se.st.mean,
- "Confidence Interval of Stratified Sampling Mean" =
- confi.interv)
- # returning the result as list
- return(list.results)
- }
- # reading the given csv data as data frame
- # dat <- read.csv(file.choose())
- dat <- read.csv(file = "hk_sir_stratified_data_25_03_2019.csv")
- # dat # printing the data frame
- # calling the function by it's name & passing the arguments
- strata.samp(dat, 10)
- # reference
- cat("Coded by\nSFT\n1711024137\nHappy Codding!\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement