Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %% Here we collect the data and we plot the histogram of both samples
- % 161 W (expected)
- % 174 M (expected)
- W = [160 184 180 158 173 160 176 168];
- M = [175 162 186 182 174 185 180 179 178];
- figure()
- hold on
- histogram(W,158:4:195)
- histogram(M,160:4:195)
- %% Calculate the size and the mean of each sample
- n_W = length(W);
- n_M = length(M);
- mean_W = mean(W);
- mean_M = mean(M);
- disp(['The mean of the heights of women in the class is ',...
- num2str(mean_W,4),' cm']);
- disp(['The mean of the heights of men in the class is ',...
- num2str(mean_M,4),' cm']);
- %% Calculate the estimate of variance of the population
- var_W = var(W);
- var_M = var(M);
- %% Calculate the standard error of the mean
- % this is the estimate of the standard deviation of the estimate of the mean
- sem_W = sqrt(var_W/n_W);
- sem_M = sqrt(var_M/n_M);
- %% Calculate the confidence 95% confidence intervals assuming the CLT
- CI_W = [mean_W - 2*sem_W, mean_W + 2*sem_W];
- CI_M = [mean_M - 2*sem_M, mean_M + 2*sem_M];
- disp(['The 95% CI for the mean of the heights of women in the class is (',...
- num2str(CI_W,4),') cm']);
- disp(['The 95% CI for the mean of the heights of men in the class is (',...
- num2str(CI_M,4),') cm']);
- %% Now we calculate the 95% CIs with bootstrap
- N_B = 10000; %number of bootstrap samples
- all_means_W = zeros(1,N_B);
- all_means_M = zeros(1,N_B);
- for k=1:N_B
- W_B = randsample(W,n_W,1);
- M_B = randsample(M,n_M,1);
- mean_W_B = mean(W_B);
- mean_M_B = mean(M_B);
- all_means_W(k) = mean_W_B;
- all_means_M(k) = mean_M_B;
- end
- % plot the histograms
- figure()
- histogram(all_means_W,150:2:190)
- hold on
- histogram(all_means_M,150:2:190)
- % compute the CIs using the quantiles of the surrogate means
- CI_B_W = [quantile(all_means_W,.025), quantile(all_means_W,.975)];
- CI_B_M = [quantile(all_means_M,.025), quantile(all_means_M,.975)];
- disp(['The 95% CI for the mean of the heights of women in the class is (',...
- num2str(CI_B_W,4),') cm']);
- disp(['The 95% CI for the mean of the heights of men in the class is (',...
- num2str(CI_B_M,4),') cm']);
- %% Let's do now the hypothesis testing
- % Null hypothesis: there is no difference between the mean of the heights of men
- % and women
- % Alternative hypothesis: there is some difference between the mean of the
- % heights of mean and women (but we don't say which
- % direction)
- % alpha = 0.05
- % test statistic = difference of the mean of the two samples
- difference = mean_M - mean_W;
- disp(['The observed difference between the means is ',...
- num2str(difference,2),' cm']);
- %% We first assume that we can apply CLT
- % this allows us to calculate directly the distribution of the statistic under
- % te null hypothesis
- % we calculate the standard deviation of the null as the square root of the sum
- % of the standard error of the mean of the two samples
- std_CRd = sqrt(sem_W^2 + sem_M^2);
- % the right boundary of the Critical Region at alpha=0.05 corresponds to two
- % times the standard deviation (because of the 68-95-99.7 rule)
- CR_right_bound = 2*std_CRd;
- disp(['The boundaries of the Critical Region are (',...
- num2str(-CR_right_bound,4),',',num2str(CR_right_bound,4),') cm']);
- disp('Therefore we fail to reject the null hypothesis at the 0.05 significance level')
- %% Let's calculate the p-value
- p_value = 2*(1 - normcdf(difference/std_CRd));
- disp(['the p-value is ',num2str(p_value,2),' (>0.05)'])
- %% Now let's not assume that we can apply CLT, instead let's do a shuffle test
- N_s = 10000; %number of permutations
- all_diffs_means = zeros(1,N_s);
- % we merge both samples
- Y = [W M];
- for k=1:N_s
- Y_s = randsample(Y,n_W+n_M,0); % permute the order of the heights
- W_s = Y_s(1:n_W);
- M_s = Y_s(n_W+1:end); % keep the size of the two original samples
- mean_W_s = mean(W_s);
- mean_M_s = mean(M_s);
- all_diffs_means(k) = mean_M_s - mean_W_s;
- end
- % plot the histogram
- figure()
- histogram(all_diffs_means,-15:15)
- % calculate the boundaries of the Critical Region
- CR_interval = [quantile(all_diffs_means,.025), quantile(all_diffs_means,.975)];
- disp(['The boundaries of the Critical Region are (',...
- num2str(CR_interval,2),') cm']);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement