Advertisement
_Mizanur

code

Nov 6th, 2021
1,166
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 18.79 KB
  1. Lab Problem: 01
  2. Question:
  3. Find out the point estimate of the population mean and interval estimate of the population mean. Where 30
  4. students quiz test marks is
  5. (2,4,3,23,25,27,28,13,15,16,20,14,35,33,32,21,35,40,42,22,33,13,17,20,25,29,27,40,38,31), total marks 50.
  6. Here polulaion size N=30 and sample size n=10.
  7. also illustrate the sample size determination, sampling distribution for mean and check the unbiasness of the
  8. population mean.
  9. R source code:
  10. #Problem-01: Point estimation and Interval estimation
  11. #Where 30 students quiz test marks, total marks 50
  12. #Find out the point estimate of population mean and
  13. #interval estimate of population mean,, Population size N=30 and Sample size n=10.
  14. data<-c(2,4,3,23,25,27,28,13,15,16,20,14,35,33,32,21,35,40,42,22,33,13,17,20,25,29,27,40,38,31)
  15. length(data)
  16. #for cheak normality
  17. qqnorm(data)
  18. qqline(data)
  19. set.seed(125)
  20. x<-sample(data,10,replace=TRUE)
  21. x
  22. y<-mean(x)
  23. y #point estimate for mean = 21.8
  24. sigma= sd(data)
  25. sigma
  26. #Interval estimate
  27. qnorm(0.025,0,1) # -1.96
  28. #lower class interval
  29. l= y-(((1.96)*(sigma))/sqrt(10))
  30. l
  31. #Upper class interval
  32. u= y+(((1.96)*(sigma))/sqrt(10))
  33. u
  34. #95% confidance interval for population mean is (14.97, 28.63)
  35. #sample size determination
  36. # There are two ways
  37. # 1). We know that the range is four times of standerd deviation(signam).
  38. # so we will get the sigma value if the range divided by 4.
  39. # 2).
  40.  n= ((2*1.96*sigma)/2)^2
  41.  n # Probable Sample size will be 465.8536 ~ 466
  42. pnorm(-1.96,0,1) #0.025
  43. #SAMPLING DISTIBUTION FOR MEAN
  44. choose(30,10) # 30045015
  45. set.seed(125)
  46. a<-rep(0,3004)
  47. for(j in 1:3004){
  48. a[j]<-mean(sample(data,10,replace=TRUE))}
  49. mean(a) #Expected value E(x ber)= 24.12693
  50. mean(data) #population mean
  51. bais=mean(a)-mean(data)
  52. bais #bias is 0.02693076 that is almost zero, so Sampling mean or (x ber) is an unbiased estimator of
  53. population mean µ.
  54. hist(a)
  55. qqnorm(a)
  56. qqline(a)
  57. #Comment: Sampling mean is an unbiased estimator of population mean.
  58. #####################################################################################
  59. ###### The extra part which is no needed for this code ############
  60. #####################################################################################
  61. curve(dnorm(x), xlim=c(-3.5, 3.5), ylab="density", main="Standard Normal Distribution")
  62. dnorm(x=0)
  63. curve(pnorm(x), xlim=c(-3.5, 3.5), ylab="probability", main="Standard Normal Cumulative Distribution")
  64. ami<-function(x){
  65. (1/(sqrt(2*pi)))*exp(-0.5*x^2)
  66. }
  67. ami(-1.96) #dnorm(-1.96)=ami(-1.96)
  68. integrate(ami, lower=-Inf, upper=3)
  69. ################Chi-square distrubution#########
  70. curve(dchisq(x,df=3), xlim=c(0,10), ylim=c(0,1), col="red", main="PDF of chi-square distribution")
  71. ###### t distribution#####
  72. curve(dt(x,df=3), xlim=c(-4,4), col="red", main="PDF of t distribution")
  73. b<-rt(100,3)
  74. b
  75. hist(b)
  76. Lab problem:02
  77. Question:
  78. Two dice rolled, S is the sum of both faces, Find the expectation of S, E(s) and variance of S, V(s). Plot the
  79. distribution of S and dice D.
  80. R source code:
  81. #Problem-02: Two dice rolled, S is the sum of both face, Find the E(s) and V(s)
  82. #Vector of outcomes
  83. s<- 2:12
  84. #vector of probabilites
  85. ps<- c(1:6, 5:1) / 36
  86. #Expectation of s
  87. es<- sum(s* ps)
  88. es
  89. # Variance of s
  90. esq<- sum((s^2) * ps)
  91. vs<- esq-es^2
  92. vs
  93. # Divide the plotting area into one row with two columns
  94. par(mfrow = c(1,2))
  95. #plot the distribution of s
  96. barplot(ps,
  97.  ylim=c(0, 0.2),
  98.  xlab= "S",
  99.  ylab="Probabilites",
  100.  col ="red",
  101.  space= 0,
  102.  main= "Sum of two dice rolls")
  103. #plot the distribution of D
  104. probability <- rep(1/6, 6)
  105. names(probability) <- 1:6
  106. barplot(probability,
  107.  ylim=c(0, 0.2),
  108.  xlab= "D",
  109.  ylab="Probabilites",
  110.  col ="green",
  111.  space= 0,
  112.  main= "Outcomes of a sngle dice rolls")
  113. ################# Home Task ##############
  114. #Two dice rolled, S is the sum of both face that is gratter than 2.
  115. # i.e sum of both faces > 2.
  116. #Find the E(s) and V(s)
  117. #Vector of outcomes for (sum of both face)>2.
  118. s1<- 3: 12
  119. s1
  120. #vector of probabilites
  121. ps1<- c(2:6, 5:1) / 36
  122. ps1
  123. sum(ps1) # total probability
  124. #Expectation of s
  125. es1<- sum(ps1* s1)
  126. es1
  127. # Variance of s
  128. esq1<- sum((s1^2) * ps1)
  129. vs1<- esq1-es1^2
  130. vs1
  131. # Divide the plotting area into one row with two columns
  132. par(mfrow = c(1,2))
  133. #plot the distribution of s
  134. barplot(ps1,
  135.  ylim=c(0, 0.2),
  136.  xlab= "S",
  137.  ylab="Probabilites",
  138.  col ="yellow",
  139.  space= 0,
  140.  main= "Sum of two dice rolls")
  141. #plot the distribution of D
  142. probability <- rep(1/6, 6)
  143. names(probability) <- 1:6
  144. barplot(probability,
  145.  ylim=c(0, 0.2),
  146.  xlab= "D",
  147.  ylab="Probabilites",
  148.  col ="green",
  149.  space= 0,
  150.  main= "Outcomes of a sngle dice rolls")
  151. Lab problem:03
  152. Question:
  153. A herd of 1500 steer was fed a special high protein gain for a month. A random sample of 29 was weighted
  154. and had gained an average of 6.7 pounds. If the sd of weight gain for the entire herd is 7.1. Test the
  155. hypothsis at 5% level of significance that the average weight gain per steer for the month was more than 5
  156. pounds. Also comments on the test using the p-value. Create the confidence interval.
  157. R source code:
  158. ##Problem-03: A herd of 1500 steer was feed a speacial high protein gain for month.
  159. # A random sample of 29 was weighted and had gain an average of 6.7 pounds. If the
  160. # sd of weight gain for the entire herd is 7.1. Test the hypothsis at 5% level of
  161. # significance that the average weight gain per steer fop the month was more than
  162. # 5 pounds. Also comments on the test using p-value.
  163. #Ho: mue equal 5
  164. #H1: mue greater than 5
  165. x_ber <- 6.7
  166. mue <- 5
  167. sd <- 7.1
  168. n <- 29
  169. alpha= 0.05
  170. z_tab <- qnorm(0.05,lower.tail=FALSE)
  171. z_tab #1.644
  172. z_cal <- (x.ber-mue)/(sd/sqrt(n))
  173. z_cal #1.289
  174. #Comments: Ho is accepted, Since z.cal<z.tab
  175. ####Using p-value ####
  176. p_value <- pnorm(z.cal,lower.tail=FALSE)
  177. p_value # 0.098
  178. #Comments: Since p-value= 0.098 > 0.05, so Ho is accepted
  179. #This p-value also indicate that if we test the hipothesis with maximum 9.8%
  180. #level of signicance, Ho was also accepted. But if we test above 9.8% then
  181. #Ho was rejected.
  182. ####If it was two tail test ####
  183. # i.e. Ho equal 5
  184. #and H1: mue not equal 5
  185. alpha= 0.05
  186. z_tab1<- qnorm(0.025)
  187. z_tab1 #-1.96
  188. z_tab2<- qnorm(0.975)
  189. z_tab2 #1.96
  190. #Also Ho is accepted, since z_cal=1.289 fall between
  191. # -1.96 to 1.96
  192. #Using p-value##
  193. p_value2<- 2*pnorm(z_cal, lower.tail=FALSE)
  194. p_value2 # 0.19
  195. #Here, p_value=0.19>0.05, so Ho is accepted
  196. #######Now make the confidance interval#####
  197. # 95% confidance interval for mue(Population mean)
  198. CI<-c(x_ber+z_tab1*sd/sqrt(n), x_ber+z_tab2*sd/sqrt(n))
  199. CI # 4.1159 to 9.2840
  200. #we are 95% sure, we have confidance that the average
  201. # weight gain is between 4.1159 to 9.2840 due to applying high protien.
  202. Lab problem:04
  203. Question:
  204. In order to find out whether children with chronic diarrhea have the same average hemoglobin level(Hb) that
  205. is normally seen in healthy children in the same area, a random sample of 10 children with chronic diarrhea
  206. are selected, and their Hb levels <g/dl) are obtained as follows: 12.3, 11.4, 14.2, 15.3, 14.8, 13.8,
  207. 11.1,15.1,15.8,13.2
  208. Do the data provide sufficient evidence to indicate that the mean Hb level for children with chronic diarrhea
  209. is less than the normal value of 14.6 (g/dl)? Test at 0.01 level of significance. Draw a boxplot and normal
  210. plot for this data and comments.
  211. R source code:
  212. ###Problem-04: In order to find out whether children with
  213. #choronic diarrhea have the same average hemoglobin
  214. #level(Hb) that is normally seen in healthy children in the same area
  215. #, a random sample of 10 children with chonic diarrhea are selected and there
  216. # Hb levels <g/dl) are obtained as follows:
  217. # 12.3, 11.4, 14.2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2
  218. #Do the data provide suffitient evidance to indicate that the
  219. #mean Hb level for children with chorinc diarrhea is
  220. #less than of the normal value of 14.6 (g/dl)?
  221. #Test at 0.01 level of significance.
  222. #Draw a boxplot and normal plot for this data and comments.
  223. #Ho: mue equal 14.6
  224. # and H1: mue less than 14.6
  225. data<-c(12.3,11.4,14.2,15.3,14.8,13.8,11.1,15.1,15.8,13.2)
  226. n<-length(data)
  227. n
  228. x_ber<- mean(data)
  229. x_ber
  230. sample_sd<- sd(data)
  231. sample_sd
  232. mue<- 14.6
  233. t_tab<- qt(0.01, n-1)
  234. t_tab #-2.821
  235. t_cal<- ((x_ber-mue)/(sample_sd/sqrt(n)))
  236. t_cal #-1.71
  237. #Comments: since t_cal>t_tab, so Ho is accepted.
  238. #Using p-value########
  239. p_value<- pt(t_cal, n-1)
  240. p_value #0.059
  241. #Comments: since p_value=0.059>0.01, so Ho is accepted.
  242. ###Using function#####
  243. t.test(data, mu=14.6, conf.level=0.99, alternative="less")
  244. boxplot(data,ylab="Hb lebel", col="red")
  245. qqnorm(data, main="Normal Q-Q plot of Hb lebel")
  246. qqline(data)
  247. Lab problem:05
  248. Question:
  249. In order to find out whether children with chronic diarrhea have the same average hemoglobin level(Hb) that
  250. is normally seen in healthy children in the same area, a random sample of 10 children with chronic diarrhea
  251. are selected, and their Hb levels <g/dl) are obtained as follows: 12.3, 11.4, 14,2, 15.3, 14.8, 13.8,
  252. 11.1,15.1,15.8,13.2
  253. another random sample of 12 children with chronic diarrhea are 11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5,
  254. 12.7, 14.5, 15.3, 14.0.
  255. Is there any difference in the mean Hb label between the two groups of children???
  256. R source code:
  257. ###problem-05: In order to find out whether children with
  258. #choronic diarrhea have the same average hemoglobin
  259. #level(Hb) that is normally seen in healthy children in the same area
  260. #, a random sample of 10 children with chonic diarrhea are selected and there
  261. # Hb levels <g/dl) are obtained as follows:
  262. # 12.3, 11.4, 14,2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2
  263. #another random sample of 12 children with chonic diarrhea are
  264. # 11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5, 12.7, 14.5, 15.3, 14.0
  265. # is there any differance between in mean Hb label between the two group of children???
  266. ##############Solution##############
  267. #Ho: mue1=mue2
  268. #H1: mue not equal mue2
  269. level<- 0.05
  270. alpha<- 0.05/2
  271. alpha #0.025
  272. data1<- c(12.3, 11.4, 14.2, 15.3, 14.8, 13.8, 11.1,15.1,15.8,13.2)
  273. n1<- length(data1)
  274. n1
  275. s1<- sd(data1)
  276. s1
  277. x_ber1<- mean(data1)
  278. x_ber1
  279. data2<- c(11.1, 17.2, 13.4, 15.2, 14.1, 13.0, 12.5, 11.5, 12.7, 14.5, 15.3, 14.0)
  280. n2<- length(data2)
  281. n2
  282. s2<- sd(data2)
  283. s2
  284. x_ber2<- mean(data2)
  285. x_ber2
  286. t_tab1<- qt(alpha, n1+n2-2)
  287. t_tab1 #-2.085
  288. t_tab2<- qt(1-alpha, n1+n2-2)
  289. t_tab2 #2.085
  290. ####### Checking the variance equal or not ##########
  291. boxplot(list(sample_1=data1, sample_2=data2), col="red")
  292. #If the middle line of both boxplot are very colse then is indicate equal variances.
  293. ratio_sd<- s1/s2
  294. ratio_sd # 0.961985 is close to 1, that's why we can say that they have equal variances.
  295. sp<- sqrt((((n1-1)*s1^2)+((n2-1)*s2^2))/(n1+n2-2))
  296. sp
  297. t_cal<- (x_ber1 - x_ber2)/sqrt(sp^2*((1/n1)+(1/n2)))
  298. t_cal # -0.01150547
  299. #Comments: Ho is accepted, since t_cal= -0.0137 fall between the t_tab value of -2.085 to 2.085
  300. ######using p-value###
  301. p_value<- 2*pt(t_cal, n1+n2-2)
  302. p_value # 0.9891
  303. #Comments: since p_value= 0.9891> 0.05, so Ho is accepted.
  304. ######### 95% Confidance Interval #########
  305. CI<-c((x_ber1-x_ber2)+(t_tab1*sp*sqrt((1/n1)+(1/n2))), (x_ber1-x_ber2)+t_tab2*sp*sqrt((1/n1)+(1/n2)))
  306. CI # -1.519183 1.502516
  307. # -1.519183 Cofidance interval 95% lower
  308. # 1.502516 Cofidance interval 95% upper
  309. ######### Using t.test function ##########
  310. ?t.test
  311. t.test(data1, data2, alternative="two.sided", mu=0, paired= FALSE, conf.level=0.95)
  312. ######################## For Another Alternative hypothesis #############
  313. # Ho: mue1=mue2
  314. #H1: mue1>mue2
  315. t_cal # -0.0137
  316. ?qt
  317. t_tabx<- qt(0.05, n1+n2-2, lower.tail=FALSE)
  318. t_tabx #1.724718
  319. #Comments: Ho is accepted, since t_cal<t_cal.
  320. ########### Using P-value ##########
  321. p_valuex<- pt(t_cal, n1+n2-2)
  322. p_valuex # 0.4945988
  323. #Comments: Ho is accepted, since p-valuex= 0.4945988> 0.05
  324. Lab problem:06
  325. Test the hypothesis that the mean systolic blood pressure of healthy subjects (status-0) and subject with
  326. hypertension(status-1) are equal, have do= 0. The dataset contains n1= 25 subjects with status-0 and n2=
  327. 30 with status-1.
  328. Status-0: (120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114, 105, 115, 134, 109, 109,
  329. 93, 118, 109, 106, 125).
  330. Status-1: (150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 , 148, 149, 141, 146, 159, 152, 135,
  331. 134, 161, 130, 125, 141, 148 ,153, 145, 137, 147, 169).
  332. R source code:
  333. # Problem-6: Test the hypothesis that the mean systolic blod pressure
  334. of healdy subject(status-0) and subject with hypertension(status-1)
  335. are equal, have do= 0. The dataset contains n1= 25 subject with
  336. status-0 nad n2= 30 with status-1.
  337. (120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114,
  338. 105, 115, 134, 109, 109, 93, 118, 109, 106, 125)
  339. (150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 ,
  340. 148, 149, 141, 146, 159, 152, 135, 134, 161, 130, 125, 141, 148 ,153,
  341. 145, 137, 147, 169)
  342. ##############Solution#############
  343. Ho: mue1=mue2
  344. H1: mue1 not equal mue2
  345. data1<- c(120, 115, 94, 118, 111, 102, 102, 131, 104, 107, 115, 139, 115, 113, 114,
  346. 105, 115, 134, 109, 109, 93, 118, 109, 106, 125)
  347. n1<- length(data1)
  348. n1
  349. s1<- sd(data1)
  350. s1
  351. x_ber1<- mean(data1)
  352. x_ber1
  353. data2<- c(150, 142, 119, 127, 141, 149, 144, 142, 149, 161, 143, 140 ,
  354. 148, 149, 141, 146, 159, 152, 135, 134, 161, 130, 125, 141, 148 ,153,
  355. 145, 137, 147, 169)
  356. n2<- length(data2)
  357. n2
  358. s2<- sd(data2)
  359. s2
  360. x_ber2<- mean(data2)
  361. x_ber2
  362. ##########Checking the normality ########
  363. par(mfrow=c(1,2))
  364. qqnorm(data1)
  365. qqline(data1)
  366. qqnorm(data2)
  367. qqline(data2)
  368. ###########Checking the variance equal or not ###########
  369. boxplot(list(status_0=data1, status_1=data2), col="red")
  370. ratio_sd<- s1/s2
  371. ratio_sd # 1.018009 is close to 1, that's why we can say that they have equal variances.
  372. alpha<- 0.05
  373. t_tab1<- qt(alpha/2, n1+n2-2)
  374. t_tab1 # -2.005746
  375. t_tab2<- qt(1-alpha/2, n1+n2-2)
  376. t_tab2 #2.005746
  377. sp<- sqrt(((n1-1)*s1^2+(n1-1)*s2^2)/(n1+n2-2))
  378. sp
  379. t_cal<- (x_ber1 - x_ber2)/sqrt(sp^2*((1/n1)+(1/n2)))
  380. t_cal #-10.9903
  381. #Comments: t_cal= -10.9903 is not fall in between the Critical value -2.005746 to 2.005746,
  382. so the Null hypothsis(Ho) is rejected.
  383. ###########using p-value##############
  384. p_value<- 2*pt(t_cal, n1+n2-2)
  385. p_value #2.793985e-15 that is < 0.05, so Ho is rejected.
  386. ########## using t.test function ############
  387. t.test(data1, data2, alternative="two.sided", mu=0, paired= FALSE, conf.level=0.95)
  388. Lab problem:07
  389. Question:
  390. The 126 people have some doing smoking and some do not smoke. Some of this type of data are tabulated is
  391. given below:
  392. Is there any association between smoking and heart disses for the given data.???
  393. Disses
  394. Smoking
  395. Heart disses Not heart disses Total
  396. YES 55 16 71
  397. NO 23 32 55
  398. Total 78 48 N=126
  399. R source code:
  400. # Problem-7: Look the folder: E:\2'2\Sampling and hypothisis testing\Lab\Lab with R\Lab 07.docx
  401. ################## Solution ##############
  402. Ho: There is no association with smoking and heart disses.
  403. H1: There is a association with smoking and heart disses.
  404. m<- matrix(c(55, 16, 23, 32), ncol=2, byrow=TRUE, dimnames=list(c("yes", "no"), c("disses",
  405. "not_disses")))
  406. m
  407. c1<- sum(m[,1])
  408. c1
  409. c2<- sum(m[,2])
  410. c2
  411. r1<- sum(m[1,])
  412. r1
  413. r2<- sum(m[2,])
  414. r2
  415. n<- sum(m)
  416. n
  417. E11<- (c1*r1)/n
  418. E11
  419. E21<- (c1*r2)/n
  420. E21
  421. E12<- (c2*r1)/n
  422. E12
  423. E22<- (c2*r2)/n
  424. E22
  425. chi_cal<- (((m[1]-E11)^2)/E11) + (((m[2]-E21)^2)/E21) + ((m[3]-E12)^2/E12) + ((m[4]-E22)^2/E22)
  426. chi_cal #16.69906
  427. #df=(r-1)(c-1) , this is the fourmula to find the df.
  428. chi_tab<- qchisq(0.05, df=1, lower.tail=FALSE)
  429. chi_tab # 3.841459
  430. #Comments: chi_cal= 16.69906 > chi_tab= 3.841459 , so Ho is rejected.
  431. # i.e smoking is associated with heart disses.
  432. ###### P-value ##########
  433. p_value<- pchisq(chi_square, df=1, lower.tail=FALSE)
  434. p_value # 4.38026e-05
  435. #Coments: p-value< 0.05, so Ho is rejected.
  436. ############################## Using chisq.test function #############
  437. chisq.test(m)
  438. # Here p-value = 9.56e-05 < 0.05, so Ho is rejected.
  439. #i.e smoking is associated with heart disses.
  440. Lab problem:08
  441. Question:
  442. There are two COVID-19 testing booths, we test some people and their recorded data is below, where the
  443. numbers of people of booth-1 are 11 and the numbers of people of booth-2 are 10:
  444. Booth-1: positive, positive, negative, positive, negative, negative, positive, positive, positive, negative,
  445. positive.
  446. Both-2: negative, negative, negative, positive, positive, negative, positive, negative, negative, negative.
  447. Is there any relation between two both???
  448. R source code:
  449. #Probelem-08: There are two COVID-19 testing booths, we test some people
  450. and their recorded data is below, where the numbers of people of booth-1 is 11 and
  451. the numbers of people of booth-2 is 10:
  452. Booth-1: positive,positive, negative,positive, negative, negative,positive,
  453. positive,positive, negative, positive
  454. Both-2: negative, negative, negative, positive, positive,negative, positive,
  455. negative, negative, negative
  456. is there any relation between two both???????
  457. ################### Solution #############
  458. Ho: There is no relation between booth-1 and booth-2.
  459. H1: There is relation between booth-1 and booth-2.
  460. booth_1<- c("positive","positive", "negative","positive", "negative", "negative",
  461. "positive","positive","positive", "negative", "positive")
  462. #booth_1<- c(1, 1, 0, 1,0, 0, 1, 1, 1, 0, 1)
  463. #table(booth_1)
  464. booth_2<- c("negative", "negative", "negative", "positive", "positive",
  465. "negative", "positive", "negative", "negative", "negative")
  466. #booth_2<- c(0, 0, 0, 1, 1, 0, 1, 0, 0, 0)
  467. #table(booth_2)
  468. x_table1<- table(booth_1)
  469. x_table1
  470.  
  471. x_table2<- table(booth_2)
  472. x_table2
  473. m<- matrix(c(4, 7, 7, 3), ncol=2, byrow=TRUE, dimnames=list(c("Booth-1", "Booth-2"), c("negative",
  474. "positive")))
  475. m
  476. c1<- sum(m[,1])
  477. c1
  478. c2<- sum(m[,2])
  479. c2
  480. r1<- sum(m[1,])
  481. r1
  482. r2<- sum(m[2,])
  483. r2
  484. n<- sum(m)
  485. n
  486. E11<- (c1*r1)/n
  487. E11
  488. E21<- (c1*r2)/n
  489. E21
  490. E12<- (c2*r1)/n
  491. E12
  492. E22<- (c2*r2)/n
  493. E22 # 4.761905 < 5, so we need to do "Yates" continuity correction.
  494. chi_yates<- (((abs(m[1]-E11)-0.5)^2)/E11) + (((abs(m[2]-E21)-0.5)^2)/E21) + ((abs(m[3]-E12)-0.5)^2/E12)
  495. + ((abs(m[4]-E22)-0.5)^2/E22)
  496. chi_yates #1.218781
  497. #df=(r-1)(c-1) , this is the fourmula to find the df.
  498. chi_tab<- qchisq(0.05, df=1, lower.tail=FALSE)
  499. chi_tab # 3.841459
  500. #Comments: chi_yates = 1.218781 < chi_tab= 3.841459 , so Ho is accepted.
  501. #i.e. There is no relation between booth-1 and booth-2.
  502. j
  503. ###### P-value ##########
  504. p_value<- pchisq(chi_yates, df=1, lower.tail=FALSE)
  505. p_value # 0.2696
  506. #Coments: p-value = 0.2696 > 0.05, so Ho is accepted.
  507. ############################## Using chisq.test function #############
  508. chisq.test(m)
  509. # Here p-value = 0.2696 > 0.05, so Ho is accepted.
  510. #i.e There is no relation between booth-1 and booth-2.
Advertisement
RAW Paste Data Copied
Advertisement