Advertisement
Guest User

Untitled

a guest
Sep 19th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 4.34 KB | None | 0 0
  1. %% Here we collect  the data and we plot the histogram of both samples
  2. % 161 W (expected)
  3. % 174 M (expected)
  4.  
  5. W   = [160 184 180 158 173 160 176 168];
  6. M   = [175 162 186 182 174 185 180 179 178];
  7.  
  8. figure()
  9. hold on
  10. histogram(W,158:4:195)
  11. histogram(M,160:4:195)
  12.  
  13. %% Calculate the size and the mean of each sample
  14.  
  15. n_W = length(W);
  16. n_M = length(M);
  17.  
  18. mean_W  = mean(W);
  19. mean_M  = mean(M);
  20.  
  21. disp(['The mean of the heights of women in the class is ',...
  22.        num2str(mean_W,4),' cm']);
  23. disp(['The mean of the heights of men in the class is ',...
  24.        num2str(mean_M,4),' cm']);
  25.  
  26. %% Calculate the estimate of variance of the population
  27.  
  28. var_W   = var(W);
  29. var_M   = var(M);
  30.  
  31. %% Calculate the standard error of the mean
  32. % this is the estimate of the standard deviation of the estimate of the mean
  33.  
  34. sem_W   = sqrt(var_W/n_W);
  35. sem_M   = sqrt(var_M/n_M);
  36.  
  37. %% Calculate the confidence 95% confidence intervals assuming the CLT
  38.  
  39. CI_W    = [mean_W - 2*sem_W, mean_W + 2*sem_W];
  40. CI_M    = [mean_M - 2*sem_M, mean_M + 2*sem_M];
  41.  
  42. disp(['The 95% CI for the mean of the heights of women in the class is (',...
  43.        num2str(CI_W,4),') cm']);
  44. disp(['The 95% CI for the mean of the heights of men in the class is (',...
  45.        num2str(CI_M,4),') cm']);
  46.  
  47.  
  48. %% Now we calculate the 95% CIs with bootstrap
  49.  
  50. N_B = 10000; %number of bootstrap samples
  51.  
  52. all_means_W = zeros(1,N_B);
  53. all_means_M = zeros(1,N_B);
  54.  
  55. for k=1:N_B
  56.     W_B = randsample(W,n_W,1);
  57.     M_B = randsample(M,n_M,1);
  58.    
  59.     mean_W_B = mean(W_B);
  60.     mean_M_B = mean(M_B);
  61.    
  62.     all_means_W(k) = mean_W_B;
  63.     all_means_M(k) = mean_M_B;    
  64. end
  65.  
  66. % plot the histograms
  67. figure()
  68. histogram(all_means_W,150:2:190)
  69. hold on
  70. histogram(all_means_M,150:2:190)
  71.  
  72. % compute the CIs using the quantiles of the surrogate means
  73.  
  74. CI_B_W = [quantile(all_means_W,.025), quantile(all_means_W,.975)];
  75. CI_B_M = [quantile(all_means_M,.025), quantile(all_means_M,.975)];
  76.  
  77. disp(['The 95% CI for the mean of the heights of women in the class is (',...
  78.        num2str(CI_B_W,4),') cm']);
  79. disp(['The 95% CI for the mean of the heights of men in the class is (',...
  80.        num2str(CI_B_M,4),') cm']);
  81.  
  82.    
  83. %% Let's do now the hypothesis testing
  84. % Null hypothesis: there is no difference between the mean of the heights of men
  85. %                  and women
  86. % Alternative hypothesis: there is some difference between the mean of the
  87. %                         heights of mean and women (but we don't say which
  88. %                         direction)
  89. % alpha = 0.05
  90. % test statistic = difference of the mean of the two samples
  91.  
  92. difference = mean_M - mean_W;
  93. disp(['The observed difference between the means is ',...
  94.        num2str(difference,2),' cm']);
  95.  
  96. %% We first assume that we can apply CLT
  97. % this allows us to calculate directly the distribution of the statistic under
  98. % te null hypothesis
  99.  
  100. % we calculate the standard deviation of the null as the square root of the sum
  101. % of the standard error of the mean of the two samples
  102. std_CRd     = sqrt(sem_W^2 + sem_M^2);
  103.  
  104. % the right boundary of the Critical Region at alpha=0.05 corresponds to two
  105. % times the standard deviation (because of the 68-95-99.7 rule)
  106. CR_right_bound  = 2*std_CRd;
  107.  
  108. disp(['The boundaries of the Critical Region are (',...
  109.        num2str(-CR_right_bound,4),',',num2str(CR_right_bound,4),') cm']);
  110. disp('Therefore we fail to reject the null hypothesis at the 0.05 significance level')
  111.  
  112. %% Let's calculate the p-value
  113.  
  114. p_value = 2*(1 - normcdf(difference/std_CRd));
  115. disp(['the p-value is ',num2str(p_value,2),' (>0.05)'])
  116.  
  117. %% Now let's not assume that we can apply CLT, instead let's do a shuffle test
  118.  
  119. N_s = 10000; %number of permutations
  120.  
  121. all_diffs_means = zeros(1,N_s);
  122.  
  123. % we merge both samples
  124. Y = [W M];
  125.  
  126. for k=1:N_s
  127.     Y_s = randsample(Y,n_W+n_M,0); % permute the order of the heights
  128.     W_s = Y_s(1:n_W);              
  129.     M_s = Y_s(n_W+1:end);          % keep the size of the two original samples
  130.    
  131.     mean_W_s = mean(W_s);
  132.     mean_M_s = mean(M_s);
  133.    
  134.     all_diffs_means(k) = mean_M_s - mean_W_s;
  135.    
  136. end
  137.  
  138. % plot the histogram
  139. figure()
  140. histogram(all_diffs_means,-15:15)
  141.  
  142. % calculate the boundaries of the Critical Region
  143. CR_interval = [quantile(all_diffs_means,.025), quantile(all_diffs_means,.975)];
  144.  
  145. disp(['The boundaries of the Critical Region are (',...
  146.        num2str(CR_interval,2),') cm']);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement