# code

Nov 6th, 2021
1,166
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. Lab Problem: 01
2. Question:
3. Find out the point estimate of the population mean and interval estimate of the population mean. Where 30
4. students quiz test marks is
5. (2,4,3,23,25,27,28,13,15,16,20,14,35,33,32,21,35,40,42,22,33,13,17,20,25,29,27,40,38,31), total marks 50.
6. Here polulaion size N=30 and sample size n=10.
7. also illustrate the sample size determination, sampling distribution for mean and check the unbiasness of the
8. population mean.
9. R source code:
10. #Problem-01: Point estimation and Interval estimation
11. #Where 30 students quiz test marks, total marks 50
12. #Find out the point estimate of population mean and
13. #interval estimate of population mean,, Population size N=30 and Sample size n=10.
14. data<-c(2,4,3,23,25,27,28,13,15,16,20,14,35,33,32,21,35,40,42,22,33,13,17,20,25,29,27,40,38,31)
15. length(data)
16. #for cheak normality
17. qqnorm(data)
18. qqline(data)
19. set.seed(125)
20. x<-sample(data,10,replace=TRUE)
21. x
22. y<-mean(x)
23. y #point estimate for mean = 21.8
24. sigma= sd(data)
25. sigma
26. #Interval estimate
27. qnorm(0.025,0,1) # -1.96
28. #lower class interval
29. l= y-(((1.96)*(sigma))/sqrt(10))
30. l
31. #Upper class interval
32. u= y+(((1.96)*(sigma))/sqrt(10))
33. u
34. #95% confidance interval for population mean is (14.97, 28.63)
35. #sample size determination
36. # There are two ways
37. # 1). We know that the range is four times of standerd deviation(signam).
38. # so we will get the sigma value if the range divided by 4.
39. # 2).
40.  n= ((2*1.96*sigma)/2)^2
41.  n # Probable Sample size will be 465.8536 ~ 466
42. pnorm(-1.96,0,1) #0.025
43. #SAMPLING DISTIBUTION FOR MEAN
44. choose(30,10) # 30045015
45. set.seed(125)
46. a<-rep(0,3004)
47. for(j in 1:3004){
48. a[j]<-mean(sample(data,10,replace=TRUE))}
49. mean(a) #Expected value E(x ber)= 24.12693
50. mean(data) #population mean
51. bais=mean(a)-mean(data)
52. bais #bias is 0.02693076 that is almost zero, so Sampling mean or (x ber) is an unbiased estimator of
53. population mean µ.
54. hist(a)
55. qqnorm(a)
56. qqline(a)
57. #Comment: Sampling mean is an unbiased estimator of population mean.
58. #####################################################################################
59. ###### The extra part which is no needed for this code ############
60. #####################################################################################
61. curve(dnorm(x), xlim=c(-3.5, 3.5), ylab="density", main="Standard Normal Distribution")
62. dnorm(x=0)
63. curve(pnorm(x), xlim=c(-3.5, 3.5), ylab="probability", main="Standard Normal Cumulative Distribution")
64. ami<-function(x){
65. (1/(sqrt(2*pi)))*exp(-0.5*x^2)
66. }
67. ami(-1.96) #dnorm(-1.96)=ami(-1.96)
68. integrate(ami, lower=-Inf, upper=3)
69. ################Chi-square distrubution#########
70. curve(dchisq(x,df=3), xlim=c(0,10), ylim=c(0,1), col="red", main="PDF of chi-square distribution")
71. ###### t distribution#####
72. curve(dt(x,df=3), xlim=c(-4,4), col="red", main="PDF of t distribution")
73. b<-rt(100,3)
74. b
75. hist(b)
76. Lab problem:02
77. Question:
78. Two dice rolled, S is the sum of both faces, Find the expectation of S, E(s) and variance of S, V(s). Plot the
79. distribution of S and dice D.
80. R source code:
81. #Problem-02: Two dice rolled, S is the sum of both face, Find the E(s) and V(s)
82. #Vector of outcomes
83. s<- 2:12
84. #vector of probabilites
85. ps<- c(1:6, 5:1) / 36
86. #Expectation of s
87. es<- sum(s* ps)
88. es
89. # Variance of s
90. esq<- sum((s^2) * ps)
91. vs<- esq-es^2
92. vs
93. # Divide the plotting area into one row with two columns
94. par(mfrow = c(1,2))
95. #plot the distribution of s
96. barplot(ps,
97.  ylim=c(0, 0.2),
98.  xlab= "S",
99.  ylab="Probabilites",
100.  col ="red",
101.  space= 0,
102.  main= "Sum of two dice rolls")
103. #plot the distribution of D
104. probability <- rep(1/6, 6)
105. names(probability) <- 1:6
106. barplot(probability,
107.  ylim=c(0, 0.2),
108.  xlab= "D",
109.  ylab="Probabilites",
110.  col ="green",
111.  space= 0,
112.  main= "Outcomes of a sngle dice rolls")
114. #Two dice rolled, S is the sum of both face that is gratter than 2.
115. # i.e sum of both faces > 2.
116. #Find the E(s) and V(s)
117. #Vector of outcomes for (sum of both face)>2.
118. s1<- 3: 12
119. s1
120. #vector of probabilites
121. ps1<- c(2:6, 5:1) / 36
122. ps1
123. sum(ps1) # total probability
124. #Expectation of s
125. es1<- sum(ps1* s1)
126. es1
127. # Variance of s
128. esq1<- sum((s1^2) * ps1)
129. vs1<- esq1-es1^2
130. vs1
131. # Divide the plotting area into one row with two columns
132. par(mfrow = c(1,2))
133. #plot the distribution of s
134. barplot(ps1,
135.  ylim=c(0, 0.2),
136.  xlab= "S",
137.  ylab="Probabilites",
138.  col ="yellow",
139.  space= 0,
140.  main= "Sum of two dice rolls")
141. #plot the distribution of D
142. probability <- rep(1/6, 6)
143. names(probability) <- 1:6
144. barplot(probability,
145.  ylim=c(0, 0.2),
146.  xlab= "D",
147.  ylab="Probabilites",
148.  col ="green",
149.  space= 0,
150.  main= "Outcomes of a sngle dice rolls")
151. Lab problem:03
152. Question:
153. A herd of 1500 steer was fed a special high protein gain for a month. A random sample of 29 was weighted
154. and had gained an average of 6.7 pounds. If the sd of weight gain for the entire herd is 7.1. Test the
155. hypothsis at 5% level of significance that the average weight gain per steer for the month was more than 5
156. pounds. Also comments on the test using the p-value. Create the confidence interval.
157. R source code:
158. ##Problem-03: A herd of 1500 steer was feed a speacial high protein gain for month.
159. # A random sample of 29 was weighted and had gain an average of 6.7 pounds. If the
160. # sd of weight gain for the entire herd is 7.1. Test the hypothsis at 5% level of
161. # significance that the average weight gain per steer fop the month was more than
162. # 5 pounds. Also comments on the test using p-value.
163. #Ho: mue equal 5
164. #H1: mue greater than 5
165. x_ber <- 6.7
166. mue <- 5
167. sd <- 7.1
168. n <- 29
169. alpha= 0.05
170. z_tab <- qnorm(0.05,lower.tail=FALSE)
171. z_tab #1.644
172. z_cal <- (x.ber-mue)/(sd/sqrt(n))
173. z_cal #1.289
174. #Comments: Ho is accepted, Since z.cal<z.tab
175. ####Using p-value ####
176. p_value <- pnorm(z.cal,lower.tail=FALSE)
177. p_value # 0.098
178. #Comments: Since p-value= 0.098 > 0.05, so Ho is accepted
179. #This p-value also indicate that if we test the hipothesis with maximum 9.8%
180. #level of signicance, Ho was also accepted. But if we test above 9.8% then
181. #Ho was rejected.
182. ####If it was two tail test ####
183. # i.e. Ho equal 5
184. #and H1: mue not equal 5
185. alpha= 0.05
186. z_tab1<- qnorm(0.025)
187. z_tab1 #-1.96
188. z_tab2<- qnorm(0.975)
189. z_tab2 #1.96
190. #Also Ho is accepted, since z_cal=1.289 fall between
191. # -1.96 to 1.96
192. #Using p-value##
193. p_value2<- 2*pnorm(z_cal, lower.tail=FALSE)
194. p_value2 # 0.19
195. #Here, p_value=0.19>0.05, so Ho is accepted
196. #######Now make the confidance interval#####
197. # 95% confidance interval for mue(Population mean)
198. CI<-c(x_ber+z_tab1*sd/sqrt(n), x_ber+z_tab2*sd/sqrt(n))
199. CI # 4.1159 to 9.2840
200. #we are 95% sure, we have confidance that the average
201. # weight gain is between 4.1159 to 9.2840 due to applying high protien.
202. Lab problem:04
203. Question:
204. In order to find out whether children with chronic diarrhea have the same average hemoglobin level(Hb) that
205. is normally seen in healthy children in the same area, a random sample of 10 children with chronic diarrhea
206. are selected, and their Hb levels <g/dl) are obtained as follows: 12.3, 11.4, 14.2, 15.3, 14.8, 13.8,
207. 11.1,15.1,15.8,13.2
208. Do the data provide sufficient evidence to indicate that the mean Hb level for children with chronic diarrhea
209. is less than the normal value of 14.6 (g/dl)? Test at 0.01 level of significance. Draw a boxplot and normal
210. plot for this data and comments.
211. R source code:
212. ###Problem-04: In order to find out whether children with
213. #choronic diarrhea have the same average hemoglobin
214. #level(Hb) that is normally seen in healthy children in the same area
215. #, a random sample of 10 children with chonic diarrhea are selected and there
216. # Hb levels <g/dl) are obtained as follows:
217. # 12.3, 11.4, 14.2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2
218. #Do the data provide suffitient evidance to indicate that the
219. #mean Hb level for children with chorinc diarrhea is
220. #less than of the normal value of 14.6 (g/dl)?
221. #Test at 0.01 level of significance.
222. #Draw a boxplot and normal plot for this data and comments.
223. #Ho: mue equal 14.6
224. # and H1: mue less than 14.6
225. data<-c(12.3,11.4,14.2,15.3,14.8,13.8,11.1,15.1,15.8,13.2)
226. n<-length(data)
227. n
228. x_ber<- mean(data)
229. x_ber
230. sample_sd<- sd(data)
231. sample_sd
232. mue<- 14.6
233. t_tab<- qt(0.01, n-1)
234. t_tab #-2.821
235. t_cal<- ((x_ber-mue)/(sample_sd/sqrt(n)))
236. t_cal #-1.71
237. #Comments: since t_cal>t_tab, so Ho is accepted.
238. #Using p-value########
239. p_value<- pt(t_cal, n-1)
240. p_value #0.059
241. #Comments: since p_value=0.059>0.01, so Ho is accepted.
242. ###Using function#####
243. t.test(data, mu=14.6, conf.level=0.99, alternative="less")
244. boxplot(data,ylab="Hb lebel", col="red")
245. qqnorm(data, main="Normal Q-Q plot of Hb lebel")
246. qqline(data)
247. Lab problem:05
248. Question:
249. In order to find out whether children with chronic diarrhea have the same average hemoglobin level(Hb) that
250. is normally seen in healthy children in the same area, a random sample of 10 children with chronic diarrhea
251. are selected, and their Hb levels <g/dl) are obtained as follows: 12.3, 11.4, 14,2, 15.3, 14.8, 13.8,
252. 11.1,15.1,15.8,13.2
253. another random sample of 12 children with chronic diarrhea are 11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5,
254. 12.7, 14.5, 15.3, 14.0.
255. Is there any difference in the mean Hb label between the two groups of children???
256. R source code:
257. ###problem-05: In order to find out whether children with
258. #choronic diarrhea have the same average hemoglobin
259. #level(Hb) that is normally seen in healthy children in the same area
260. #, a random sample of 10 children with chonic diarrhea are selected and there
261. # Hb levels <g/dl) are obtained as follows:
262. # 12.3, 11.4, 14,2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2
263. #another random sample of 12 children with chonic diarrhea are
264. # 11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5, 12.7, 14.5, 15.3, 14.0
265. # is there any differance between in mean Hb label between the two group of children???
266. ##############Solution##############
267. #Ho: mue1=mue2
268. #H1: mue not equal mue2
269. level<- 0.05
270. alpha<- 0.05/2
271. alpha #0.025
272. data1<- c(12.3, 11.4, 14.2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2)
273. n1<- length(data1)
274. n1
275. s1<- sd(data1)
276. s1
277. x_ber1<- mean(data1)
278. x_ber1
279. data2<- c(11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5, 12.7, 14.5, 15.3, 14.0)
280. n2<- length(data2)
281. n2
282. s2<- sd(data2)
283. s2
284. x_ber2<- mean(data2)
285. x_ber2
286. t_tab1<- qt(alpha, n1+n2-2)
287. t_tab1 #-2.085
288. t_tab2<- qt(1-alpha, n1+n2-2)
289. t_tab2 #2.085
290. ####### Checking the variance equal or not ##########
291. boxplot(list(sample_1=data1, sample_2=data2), col="red")
292. #If the middle line of both boxplot are very colse then is indicate equal variances.
293. ratio_sd<- s1/s2
294. ratio_sd # 0.961985 is close to 1, that's why we can say that they have equal variances.
295. sp<- sqrt((((n1-1)*s1^2)+((n2-1)*s2^2))/(n1+n2-2))
296. sp
297. t_cal<- (x_ber1 - x_ber2)/sqrt(sp^2*((1/n1)+(1/n2)))
298. t_cal # -0.01150547
299. #Comments: Ho is accepted, since t_cal= -0.0137 fall between the t_tab value of -2.085 to 2.085
300. ######using p-value###
301. p_value<- 2*pt(t_cal, n1+n2-2)
302. p_value # 0.9891
303. #Comments: since p_value= 0.9891> 0.05, so Ho is accepted.
304. ######### 95% Confidance Interval #########
305. CI<-c((x_ber1-x_ber2)+(t_tab1*sp*sqrt((1/n1)+(1/n2))), (x_ber1-x_ber2)+t_tab2*sp*sqrt((1/n1)+(1/n2)))
306. CI # -1.519183 1.502516
307. # -1.519183 Cofidance interval 95% lower
308. # 1.502516 Cofidance interval 95% upper
309. ######### Using t.test function ##########
310. ?t.test
311. t.test(data1, data2, alternative="two.sided", mu=0, paired= FALSE, conf.level=0.95)
312. ######################## For Another Alternative hypothesis #############
313. # Ho: mue1=mue2
314. #H1: mue1>mue2
315. t_cal # -0.0137
316. ?qt
317. t_tabx<- qt(0.05, n1+n2-2, lower.tail=FALSE)
318. t_tabx #1.724718
319. #Comments: Ho is accepted, since t_cal<t_cal.
320. ########### Using P-value ##########
321. p_valuex<- pt(t_cal, n1+n2-2)
322. p_valuex # 0.4945988
323. #Comments: Ho is accepted, since p-valuex= 0.4945988> 0.05
324. Lab problem:06
325. Test the hypothesis that the mean systolic blood pressure of healthy subjects (status-0) and subject with
326. hypertension(status-1) are equal, have do= 0. The dataset contains n1= 25 subjects with status-0 and n2=
327. 30 with status-1.
328. Status-0: (120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114, 105, 115, 134, 109, 109,
329. 93, 118, 109, 106, 125).
330. Status-1: (150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 , 148, 149, 141, 146, 159, 152, 135,
331. 134, 161, 130, 125, 141, 148 ,153, 145, 137, 147, 169).
332. R source code:
333. # Problem-6: Test the hypothesis that the mean systolic blod pressure
334. of healdy subject(status-0) and subject with hypertension(status-1)
335. are equal, have do= 0. The dataset contains n1= 25 subject with
336. status-0 nad n2= 30 with status-1.
337. (120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114,
338. 105, 115, 134, 109, 109, 93, 118, 109, 106, 125)
339. (150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 ,
340. 148, 149, 141, 146, 159, 152, 135, 134, 161, 130, 125, 141, 148 ,153,
341. 145, 137, 147, 169)
342. ##############Solution#############
343. Ho: mue1=mue2
344. H1: mue1 not equal mue2
345. data1<- c(120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114,
346. 105, 115, 134, 109, 109, 93, 118, 109, 106, 125)
347. n1<- length(data1)
348. n1
349. s1<- sd(data1)
350. s1
351. x_ber1<- mean(data1)
352. x_ber1
353. data2<- c(150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 ,
354. 148, 149, 141, 146, 159, 152, 135, 134, 161, 130, 125, 141, 148 ,153,
355. 145, 137, 147, 169)
356. n2<- length(data2)
357. n2
358. s2<- sd(data2)
359. s2
360. x_ber2<- mean(data2)
361. x_ber2
362. ##########Checking the normality ########
363. par(mfrow=c(1,2))
364. qqnorm(data1)
365. qqline(data1)
366. qqnorm(data2)
367. qqline(data2)
368. ###########Checking the variance equal or not ###########
369. boxplot(list(status_0=data1, status_1=data2), col="red")
370. ratio_sd<- s1/s2
371. ratio_sd # 1.018009 is close to 1, that's why we can say that they have equal variances.
372. alpha<- 0.05
373. t_tab1<- qt(alpha/2, n1+n2-2)
374. t_tab1 # -2.005746
375. t_tab2<- qt(1-alpha/2, n1+n2-2)
376. t_tab2 #2.005746
377. sp<- sqrt(((n1-1)*s1^2+(n1-1)*s2^2)/(n1+n2-2))
378. sp
379. t_cal<- (x_ber1 - x_ber2)/sqrt(sp^2*((1/n1)+(1/n2)))
380. t_cal #-10.9903
381. #Comments: t_cal= -10.9903 is not fall in between the Critical value -2.005746 to 2.005746,
382. so the Null hypothsis(Ho) is rejected.
383. ###########using p-value##############
384. p_value<- 2*pt(t_cal, n1+n2-2)
385. p_value #2.793985e-15 that is < 0.05, so Ho is rejected.
386. ########## using t.test function ############
387. t.test(data1, data2, alternative="two.sided", mu=0, paired= FALSE, conf.level=0.95)
388. Lab problem:07
389. Question:
390. The 126 people have some doing smoking and some do not smoke. Some of this type of data are tabulated is
391. given below:
392. Is there any association between smoking and heart disses for the given data.???
393. Disses
394. Smoking
395. Heart disses Not heart disses Total
396. YES 55 16 71
397. NO 23 32 55
398. Total 78 48 N=126
399. R source code:
400. # Problem-7: Look the folder: E:\2'2\Sampling and hypothisis testing\Lab\Lab with R\Lab 07.docx
401. ################## Solution ##############
402. Ho: There is no association with smoking and heart disses.
403. H1: There is a association with smoking and heart disses.
404. m<- matrix(c(55, 16, 23, 32), ncol=2, byrow=TRUE, dimnames=list(c("yes", "no"), c("disses",
405. "not_disses")))
406. m
407. c1<- sum(m[,1])
408. c1
409. c2<- sum(m[,2])
410. c2
411. r1<- sum(m[1,])
412. r1
413. r2<- sum(m[2,])
414. r2
415. n<- sum(m)
416. n
417. E11<- (c1*r1)/n
418. E11
419. E21<- (c1*r2)/n
420. E21
421. E12<- (c2*r1)/n
422. E12
423. E22<- (c2*r2)/n
424. E22
425. chi_cal<- (((m[1]-E11)^2)/E11) + (((m[2]-E21)^2)/E21) + ((m[3]-E12)^2/E12) + ((m[4]-E22)^2/E22)
426. chi_cal #16.69906
427. #df=(r-1)(c-1) , this is the fourmula to find the df.
428. chi_tab<- qchisq(0.05, df=1, lower.tail=FALSE)
429. chi_tab # 3.841459
430. #Comments: chi_cal= 16.69906 > chi_tab= 3.841459 , so Ho is rejected.
431. # i.e smoking is associated with heart disses.
432. ###### P-value ##########
433. p_value<- pchisq(chi_square, df=1, lower.tail=FALSE)
434. p_value # 4.38026e-05
435. #Coments: p-value< 0.05, so Ho is rejected.
436. ############################## Using chisq.test function #############
437. chisq.test(m)
438. # Here p-value = 9.56e-05 < 0.05, so Ho is rejected.
439. #i.e smoking is associated with heart disses.
440. Lab problem:08
441. Question:
442. There are two COVID-19 testing booths, we test some people and their recorded data is below, where the
443. numbers of people of booth-1 are 11 and the numbers of people of booth-2 are 10:
444. Booth-1: positive, positive, negative, positive, negative, negative, positive, positive, positive, negative,
445. positive.
446. Both-2: negative, negative, negative, positive, positive, negative, positive, negative, negative, negative.
447. Is there any relation between two both???
448. R source code:
449. #Probelem-08: There are two COVID-19 testing booths, we test some people
450. and their recorded data is below, where the numbers of people of booth-1 is 11 and
451. the numbers of people of booth-2 is 10:
452. Booth-1: positive,positive, negative,positive, negative, negative,positive,
453. positive,positive, negative, positive
454. Both-2: negative, negative, negative, positive, positive,negative, positive,
455. negative, negative, negative
456. is there any relation between two both???????
457. ################### Solution #############
458. Ho: There is no relation between booth-1 and booth-2.
459. H1: There is relation between booth-1 and booth-2.
460. booth_1<- c("positive","positive", "negative","positive", "negative", "negative",
461. "positive","positive","positive", "negative", "positive")
462. #booth_1<- c(1, 1, 0, 1,0, 0, 1, 1, 1, 0, 1)
463. #table(booth_1)
464. booth_2<- c("negative", "negative", "negative", "positive", "positive",
465. "negative", "positive", "negative", "negative", "negative")
466. #booth_2<- c(0, 0, 0, 1, 1, 0, 1, 0, 0, 0)
467. #table(booth_2)
468. x_table1<- table(booth_1)
469. x_table1
470.
471. x_table2<- table(booth_2)
472. x_table2
473. m<- matrix(c(4, 7, 7, 3), ncol=2, byrow=TRUE, dimnames=list(c("Booth-1", "Booth-2"), c("negative",
474. "positive")))
475. m
476. c1<- sum(m[,1])
477. c1
478. c2<- sum(m[,2])
479. c2
480. r1<- sum(m[1,])
481. r1
482. r2<- sum(m[2,])
483. r2
484. n<- sum(m)
485. n
486. E11<- (c1*r1)/n
487. E11
488. E21<- (c1*r2)/n
489. E21
490. E12<- (c2*r1)/n
491. E12
492. E22<- (c2*r2)/n
493. E22 # 4.761905 < 5, so we need to do "Yates" continuity correction.
494. chi_yates<- (((abs(m[1]-E11)-0.5)^2)/E11) + (((abs(m[2]-E21)-0.5)^2)/E21) + ((abs(m[3]-E12)-0.5)^2/E12)
495. + ((abs(m[4]-E22)-0.5)^2/E22)
496. chi_yates #1.218781
497. #df=(r-1)(c-1) , this is the fourmula to find the df.
498. chi_tab<- qchisq(0.05, df=1, lower.tail=FALSE)
499. chi_tab # 3.841459
500. #Comments: chi_yates = 1.218781 < chi_tab= 3.841459 , so Ho is accepted.
501. #i.e. There is no relation between booth-1 and booth-2.
502. j
503. ###### P-value ##########
504. p_value<- pchisq(chi_yates, df=1, lower.tail=FALSE)
505. p_value # 0.2696
506. #Coments: p-value = 0.2696 > 0.05, so Ho is accepted.
507. ############################## Using chisq.test function #############
508. chisq.test(m)
509. # Here p-value = 0.2696 > 0.05, so Ho is accepted.
510. #i.e There is no relation between booth-1 and booth-2.