Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.17 KB | None | 0 0
  1. library(ggplot2)
  2. library(dplyr)
  3.  
  4. set.seed(420)
  5.  
  6. setwd("/if/fame/gcm/usr/george/math658")
  7.  
  8. ID <- 1:300
  9. IDschool <- rep(1:15, 20)
  10. question1 <- rbinom(n = 100, size = 1, prob = 0.17)
  11. question2 <- round(rnorm(n = 100, mean = 2.85, sd = 0.4), digits = 1)
  12. question3 <- round(rnorm(n = 100, mean = 3.7, sd = 0.23), digits = 1)
  13. question4 <- round(rnorm(n = 100, mean = 1.802, sd = 0.2), digits = 1)
  14. question5 <- sample(c(2.4, 2.25, 2.3, 2.45, 2.5), prob = c(0.1, 0.3, 0.4, 0.1, 0.1))
  15.  
  16. survey <- data.frame(ID = ID, IDschool = IDschool, Q1 = question1, Q2 = question2, Q3 = question3, Q4 = question4, Q5 = question5)
  17.  
  18. #uncomment if you want to create a new survey dataset
  19. #write_csv(survey, 'survey.csv')
  20.  
  21. #pilot survey
  22. surveydf = read.csv("survey.csv",header = TRUE)
  23. keep = c(1)
  24. gtdf = dplyr::filter(surveydf, IDschool %in% keep)
  25. pilotdf = head(gtdf,10)
  26. var(pilotdf$Q2)
  27.  
  28. #complete survey
  29. keep = c(2,3,4,5,6)
  30. #Lets say UMD - job market(2), Carnegie Mellon University - full list(3), Penn - full list(4), Johns Hopkins - full list(5), Temple - full list(6)
  31. sampledf = dplyr::filter(surveydf, IDschool %in% keep)
  32. #Mark UMD as job market candidate
  33. sampledf$listtype <- as.numeric(sampledf$IDschool == 2)
  34.  
  35. #Right now the story is we selected 10 students from each uni, 2 students did not respond, those students are from John Hopkins and Temple
  36. sampledf = head(sampledf,48)
  37.  
  38. #ybarhat 2 clust
  39. #N/n
  40. N=15
  41. n=5
  42. coef1 = N/n
  43. #M_i/m_i
  44. Mi = 20
  45. coef2_234 = Mi/10
  46. coef2_56 = Mi/9
  47. Mzero = 300
  48.  
  49. aggdf <- aggregate(sampledf[, 3:7], list(sampledf$IDschool), sum)
  50.  
  51. first_colsums <- colSums(aggdf[1:3,])
  52. second_colsums <- colSums(aggdf[4:5,])
  53.  
  54. first_colsums <- coef1*coef2_234*first_colsums
  55. second_colsums <- coef1*coef2_56*second_colsums
  56.  
  57. yclust <- first_colsums + second_colsums
  58. yclust <- yclust/Mzero
  59.  
  60. yclust <- yclust[c(2,3,4,5,6)]
  61.  
  62. #now estimate variance
  63. Mbar = 300/15
  64.  
  65. unimeansdf <- aggregate(sampledf[, 3:7], list(sampledf$IDschool), mean)
  66. names(unimeansdf) <- c("IDschool", "Q1mean","Q2mean","Q3mean","Q4mean","Q5mean")
  67. s_t_squared <- ((Mi*unimeansdf[c(2,3,4,5,6)] - Mbar*yclust)^2)/(n-1)
  68.  
  69. s_i_squared <- merge(sampledf, unimeansdf,by="IDschool")
  70.  
  71. colMeans(surveydf[,3:7])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement