## Set A ## 1) Model Sampling from Cauchy and Laplace distribution > # a) > # X follows cauchy disribution with parameter theta=10 and lambda=1 > #using inverse Transformation CDF > n=11 #size of random sample to be drawn from Cauchy distribution > lambda=1 #Scale Parameter > theta=10 #Location parameter > # As we Know that evry distribution cdf follows standard uniform distribution > y=runif(n) # Generating random sample of size 11 from uniform distribution > x=lambda*tan(pi* (v-0.5))+theta;x > # b) Median of the cauchy sample obtain From obove > median(x) ## Cauchy= L/2*exp(-L|x-u|) ## 2) Log normal Cancer # (1/(x-a)sigrt2pi)exp(-1/2sigsq(logx-a - u)sq) > # Fiting log normal distribution > LL=seq(0,60,10);LL #lower Limit of given data > UL=seq(10,70,10);UL #upper limit of the given data > Freq=c(20,90,52,11,6,4,1);Freq > N=sum(Freq) > x=(UL+LL)/2; #mid point of the gien data > d=data.frame("lower limit"=LL, "Upper limit"=UL,"mid point"=x, "frequency"=Freq) > d > M12=sum(Freq*x/sum(Freq); M12 # Frist sample raw Movement used for estimating mean and Sigma > M13=sum(Freq*(x^2))/sum(Freq);M13 # Second sample raw moment used for estimating mean sigma > sigma_est=log(M13,base=exp(1))-(2*log(M12,base =exp (1))); sigma_est ##estimating sigma for calculating Probabilities > mu_est=(2*log(M12,base=exp(1)))-(log(M13,base=exp(1))/2); mu_est ##estimating mean for calculating probabilities > a=plnorm(UL,mu_est,sqrt(sigma_est));a #calculating cumulative Probabilities > px=с() > px[1]=a[1] > for (i in 1: (length(a)-1)){ + px[i+1]=ali+1]-a[i] +} > ExFreq=round(N*px,2);ExFreq > sum(ExFreq) > D1= data.frame(d, "expected"= Exfreq) # 3) literates 2 year Ratio and Regression mthd › #Ratio and regression method of estimation Comparsion with SRSWOR > x=с(109,101,125,254,559,359,427,481);X > y=c(99,112,111,278,634,355,399,489); Y > N=170 #No observation of given data X. > n=length(y) > Xt=21288000 #population total of given data X > Х_bar_N=xt/N;X_bar_N #Population Mean of given data of X > y_bar_n=mean(y) > x_bar_n=mean(x) > #Ratio method. > Rn=y_bar_n/x_bar_n;Rn #ratio of sample mean of y and x > Y_bar_N=Rn*X_bar_N;Y_bar_N #Estimate of Population mean of Y > sy_sq=var(y);sy_sq #sample mean square of y > sx_sq=var(x);sx_sq #sample mean square of x > sxy_sq=var(x,y);sxy_sq #sample mean square of xy > SE_Y_totalhat=N*sqrt(((1/n)-(1/N))*(sy_sq+Rn^2*sx_sq-(2*Rn*sxy_sq)));SE_Y_totalhat #Estimate of SE of population total of Y > #Regression method: > byx=sxy_sq/sx_sq;byx #regression coefficient of Y on X > Y_hat=y_bar_n+(byx*(X_bar_N-x_bar_n));Y_hat #Estimate of Population mean of Y > Y_hat_total=N*Y_hat;Y_hat_total #Estimate of Population total of Y > SE_Y_total1=N*sqrt((N-n)/(N*n))*(sy_sq+(byx^2*sx_sq)-(2*byx*sxy_sq)));SE_Y_total1 #Estimate of SE of population total of Y > #comparison between SRSwor > var_y_bar=((1/n)-(1/N))*sy_sq;var_y_bar #variance of sample mean under SRSWOR › SE_Y_hat=N*sqrt(var_y_bar);SE_Y_hat #S.E of sample mean under SRSWOR > #conclusion- SE(Y_hat)SRSWOR>SE(Y_hat)RATIO>SE(Y_hat)REGRESSION ># This implies that variation of regression estimator is minimum and use supplementary information increases the precision. ## SET B ## 1. B > #que no 4 > mu=5 > sigma=sqrt(4) > set.seed (8) > u=runif(8,0,1) #where u and v follows U(0,1) > U > v=runif (8,0,1) > V > z=sqrt(-2*log(x=u)) *cos(2*pi*v) #random sample from N (0,1) > Z 1>x=mu+(2* sigma) #random sample from N(5,4) > X > mean (X) > #conclusion-Hence our sample mean is close to population mean but sample variance shows some deviation but as the sample size tends to infinity it will close to population variance #1.C > #Weibull distribution with alpha=15 and beta=10 > alpha3=3 #parameter values of weibull distribution > beta3=1 > x=seq (-0.01,15,0.1);x > px4=dweibull(x,shape=beta3,scale=alpha3);px > plot(x,px4,col="pink",xlab="variable", ylab="probability", lwd=4,type="1", ylim=c(0,0.92)) > #а) > alpha=1 > beta=2 #parameter values of weibull distribution > x=seq(-0.01,15,0.1);x > px=dweibull(x,shape=beta,scale=alpha);px > lines(x=x,y=px,lty=3,col="yellow", lwd=3) >#D) > alpha1=5 #parameter values of weibull distribution > beta1=1 > px1=dweibull(x,shape=beta1,scale=alpha1);px > lines(x=x,Y=px1,col="blue", lwd=4,Ity=2) > #C) > alpha1=1 #parameter values of weibull distribution > beta1=1 > px2=dweibull(x,shape=beta1,scale=alphal);px > lines(x=x,y=px2,col="green", lwd=3,Ity=2) > >#d) > alpha2=10 #parameter values of weibull distribution > beta2=5 > px3=dweibull(x,shape=beta2,scale=alpha2); px > lines(x=x,y=px3,col="violet", wd=4,Ity=2) legend (locator(1), legend=c("W(alpha=3,beta=1)", "W(alpha=1,beta=2)", "W(alpha=5,beta=1)", "W(alpha=1,beta=1)", "W(alpha=10, beta=5)") fill=c("pink", "yellow","blue", "green", "violet")) > #conclusion-As we we increase the values of both the shape parameter it slowly becomes symmetric from strong negatively skewed. ###proportional and neyman allocation > N1=80 #population > N2=60 > N3=40 > N4=100 > N5=100 > Ni=c(80,60,40,100,120); Ni #population of each strata > N=sum(N1,N2,N3, N4, N5);N > y_bar_i=c(82.3,161.5,139.2,239.1,200);Y_bar_i #Popn mean of each strata > Si=c(21.3,17.7,14.7,15.3,18.9);Si > n=40 > # Obtain the size of the sub sample from each stratum > # for population allocation > ni=(n/N)*Ni;ni > n1=ni[1];n1 > n2=ni|2];n2 > n3=ni[3];n3 > n4=ni[4];n4 > n5=ni[5];n5 ># for Nayman's allocation > ni=(n*Ni*Si)/sum(Ni*Si) > n1=round(ni(1],0);n1 > n2=round(ni[2],0);n2 > n3=round(ni[3],0);n3 > n4=round(ni[4],0);n4 > n5=round (ni[5],0);n5 > # To obtain S.E of the estimate of population mean and population total > # under population allocation > pi=c(N1/N,N2/N,N3/N, N4/N,N5/N);pi > S_E_est_ Ybar1=sqrt(((1/n)-(1/N))*sum(pi*Si^2));S_E_est_Ybar1 > S.E_est_Yt1=sqrt(N*S_E_est_Ybar1);S.E_est_Yt1 › # Under Nayman's allocation > S.E_est_ Ybar2=sqrt((1/n)*(sum(pi*Si))*2)-((1/N)*sum(pi*Si^2)));S.E_est_Ybar2 > S.E_est_Vt2=sqrt(N*S.E_est_Ybar2);S.E_est_Yt2 > #UNDER SRSWOR > Y_bar_N=sum(Y_bar_i*Ni)/N;Y_bar_N > S.E_est_Ybar3=sart((N-n)/(N*n))*(1/(N-1)) *(Isum(Ni-1) *Si^2)+(sum((Y_bar_i-Y_bar _N)*2*Ni));S.E_est_Ybar3 > S.E_est_Yt3=sqrt(N*S.E_est_Ybar3);S.E_est_Yt3 › # conclusion:since varience of ybarst of population mean under SRSWOR is more than the varience under population allocation and Neyman's allocation SET C Q1. A(i) > x=sort(c(24,38,61,22,16,57,31,29,35));x > length(x) > LL=seq(1,9,1);LL > UL=sort(seq(1,9,1), decreasing = TRUE); UL > d=data.frame(LL,UL);a > d1=subset(d,LL d2=data.frame(d1,LB=x[d1$LL],UB=[d1$UL]);d2 > con_Fin=pbinom(d2$UL-1,9,0.5)-pbinom(d2$LL-1,9,0.5) > data.frame(d2,con_Fin) > #Median would lie within 24,38 confidence interval with 82% confidence coefficent. Q1. C > #Q 2: 6 students mark out of 50 > L=c(42,18,25,35,20,30);L > N=length(L);N #L is total no of sampling unit IN population > n=2;n > 0b1=1:6 > 002=1:6 > m=merge(ob1,0b2);m > sub=subset(m,m$x>m$y);sub #position of observation in SRSWOR sample > d=data.frame(ob1=L|sub|,11],ob2=L[sub|,2]]);d > d1=data.frame(d,m=apply(d,1,mean),sv=(apply(d,1,var))*((n-1)/n));d1 > n1= length(d1$sm) > #verify(E(y_bar)=Y_bar) > est_y_bar=mean(d15sm);est_y_bar > #conclusion:population mean is unbiased estimator of sample mean > ybar=mean(L); ybar > #veryfy (var(ybar)=((N-n)/Nn)s^2) > n=length(d1$sv);n > var_y_bar-var(d15sm)*(n1-1)/(n1);var_y_bar > RHS=var(L)*((N-n)/(N*n)); RHS > #confidence interval for population mean > alpha=0.05 > z_alpha_2=gnorm(1-(alpha/2));z_alpha_2 > sigma=sqrt(d1$sv);sigma > d2-data.frame(d2, L_cl-d15m-(z_alpha_2*sigma), U_CL=15sm+(z_alpha 2*sigma));d2 > d3=data.frame(d2,T_L_cl=N*(d1$sm-(z_alpha_2*sigma)), T_U_CL=N*(d1$sm+(z_alpha_2*sigma)));d3 ## list of 3100 , 41 wrong .. Find SE and CI > N=3100;N # where N is Population size. > n=200;n #where n is random sample size from population. > a=41 #No. of names and address which are wrong and need to be corrected in list in sample of 200. > р=а/п;р #where p is proportion of sample which is unbaised estimator of population proportiona > q=1-p;9 > #To find estimate for standard error for population Proportion > est_S.E_est_P=sqrt((N-n)/(n*N))*(n/(n-1))*p*q);est_S.E_est_P > #To find 95% confidence interval for population proprotion > #for n>30 > alpha=0.05 > Z_alpha=qnorm (1-(alpha/2),0,1);Z_alpha > L_CI=p-(Z_alpha*est_S.E_est_P);L_CI > U_CI=p+(Z_alpha*est _S.E_est_P);U_Cl > #To find 95% confidence interval for population Total > L_CI=N*(p-(Z_alpha*est_S.E_est_P));L_Cl > U_CI=N*(p+(Z_alpha*est_S.E_est_P));U_Cl Sale_last_year=c(50,35,12,10, 15,30,9,25, 100,250,50,50,150,100,40);Sal e_last_year 111 50 35 12 10 15 30 9 25 100 250 50 50 150 100 40 > Sale_this_year=c(56,48,22,14,18,26,11,30,165,409,73,70,95,55,83);Sale _this_year > N=300 #No observation of given data X > n=length(y) > Xt=21300 #population total of given data X > X_bar_N=Xt/N;_bar_N [1] 71 #Population Mean of given data of x > y_bar _n=mean (y) > x_ bar_n=mean(x) #sample mean of y #sample mean of y > #Ratio method. > Rn=y_bar_n/x_bar_n;Rn #ratio of sample mean of y and sample meanof x [1] 0.9622642 > Y_bar_N=Rn *X_bar_N;Y_bar_N #Estimate of Population mean of Y [1] 68.32075 > sy_sq=var(y);sy_sq #sample mean square of y [1] 88.1 > sx_sq=var(x);sx_sq #sample mean square of x [1] 100.6778 > sxy_sq=var(x,y);sxy_sq #sample mean square of xy [1] 79.63333 _Y_totalhat=N*sqrt(((1/n)-(1/N))*(sy_sq+Rn^2*sx_sq- (2*Rn*sxy_sq)));SE_Y. _totalhat #Estimate of SE of population total of Y [1] 494.1418 > #Regression method: > byx=sxy_sq/sx_sq;byx #regression coefficient of Y on X [1] 0.7909723 > Y_hat=y_bar_n+(byx*(X_bar_N-x_bar_n));Y__hat Population mean of Y [1] 66.14535 > Y_hat_total=N*Y_hat;Y_hat_total mean of X [1] 19843.6 #Estimate of Population > SE_Ytotal1=N*sqrt((N-n)/(N*n))*(sy_sa+(byx^2*sx_sa)-(2*byx*sxy_sq))) > #The Standard error of estimator from Ratio and regression method is494.1418 and 467.4147