StatComp_2020Q1Q2

#2020-Q1------------------------------------------------------------------------------------
temp <- sum(1/c(3:12))
c <- 1/temp  # c = 0.6237483
p <- c/c(3:12)
theta <- -sum(p*log(p)) # theta = 2.200209

sim_n <- 10000; n1 <- 20; n2 <- 50

set.seed(1234)
N20 <- rmultinom(n = sim_n, size = n1, prob = p)
# row-i means Xi & column-j means j-th simulation with n = 20


g <- function(x, n){-(x/n)*log(x/n)}
theta1_n1 <- apply(N20, 2, function(x) sum(g(x, n1), na.rm = TRUE) )
# theta11<-rep(0,sim_n)
# for (j in 1:sim_n) {
#   for(i in 1:10){
#     if(N20[i,j] != 0){
#       theta11[j]<-[j] +g(N20[i,j], n1)
#     }
#   }
# }
# all.equal(theta11, theta1_n1)
# # Note: all.equal vs identical
# # the former one can tolerate small difference,
# # the latter one must be exactly identical, including datatype and numerical error.
theta2_n1 <- theta1_n1 + 9/(2*n1)

theta1_bar_n1 <- mean(theta1_n1)
theta2_bar_n1 <- mean(theta2_n1)
bias1_n1 <- theta1_bar_n1 - theta
bias2_n1 <- theta2_bar_n1 - theta
SE1_n1<-sqrt(sum((theta1_n1 - theta1_bar_n1)^2)/sim_n)
SE2_n1<-sqrt(sum((theta2_n1 - theta2_bar_n1)^2)/sim_n)
R_mse1_n1<-sqrt(sum((theta1_n1 - theta)^2)/sim_n)
R_mse2_n1<-sqrt(sum((theta2_n1 - theta)^2)/sim_n)

N50 <- rmultinom(n = sim_n,size = n2,prob = p)
theta1_n2 <- apply(N50, 2, function(x) sum(g(x, n2), na.rm = TRUE) )
# row-i means Xi & column-j means jth simulation with n = 50

# theta12<-rep(0,sim_n)
# for (j in 1:sim_n) {
#   for(i in 1:10){
#     if(N50[i,j] != 0){
#       theta12[j]<-theta12[j] +g(N50[i,j], n2)
#     }
#   }
# }
theta2_n2 <- theta1_n2 + 9/(2*n2)
theta1_bar_n2 <- mean(theta1_n2)
theta2_bar_n2 <- mean(theta2_n2)
bias1_n2 <- theta1_bar_n2 - theta
bias2_n2 <- theta2_bar_n2 - theta
SE1_n2<-sqrt(sum((theta1_n2 - theta1_bar_n2)^2)/sim_n)
SE2_n2<-sqrt(sum((theta2_n2 - theta2_bar_n2)^2)/sim_n)
R_mse1_n2<-sqrt(sum((theta1_n2 - theta)^2)/sim_n)
R_mse2_n2<-sqrt(sum((theta2_n2 - theta)^2)/sim_n)

result<-{data.frame("n" = c(n1," "," ",n2," "," "),
                    "Mean estimate" = c(" ",theta1_bar_n1,theta2_bar_n1," ",theta1_bar_n2,theta2_bar_n2),
                    "Bias" = c(" ",bias1_n1,bias2_n1," ",bias1_n2,bias2_n2),
                    "Sample SE" = c(" ",SE1_n1,SE2_n1," ",SE1_n2,SE2_n2),
                    "Root MSE" = c(" ",R_mse1_n1,R_mse2_n1," ",R_mse1_n2,R_mse2_n2),
                    row.names = c(" ","theta1_hat","theta2_hat","  ","theta1-hat","theta2-hat"))
}
result
# write.csv(result,"C:/Users/**Username**/Desktop/result.csv")

#2020-Q2------------------------------------------------------------------------------------
replicate <- 5000
n <- 30
set.seed(1234)
X <- matrix(rnorm(replicate*n), ncol=n, nrow=replicate)
# Theoretically, row and col should switch due to the matrix generating order in r.
# But this way(nrow is replicate) is more easily to understand.
Y <- X^2
a <- rep(0, 6)
# Q2-(1) #######
temp <- rowSums(X[,19:20]) # alternative: apply(X[,19:20],1,sum)
a[1]<- length(temp[temp < 11/29])/replicate # 0.616000
# Q2-(2) #######
temp <- apply(Y[,1:11],1, function(x) sum(x))
temp <- sqrt(temp)
a[2]<-quantile(temp, prob= 0.9) # 4.14688
# Q2-(3) #######
temp1 <- apply(Y[,1:11], 1, function(x) sum(x))
temp2 <- apply(Y[,1:29], 1, function(x) sum(x))
temp <- temp1/temp2
a[3]<-median(temp) # 0.3645801
# Q2-(4) #######
qnorm(0.75) # 0.6744898
temp <- apply(X, 1, function(x) quantile(x, prob= 0.75))
a[4]<- mean(temp) # 0.6439604
# Q2-(5) #######
qf(0.95,1,1) # 161.4476
qbeta(0.95, shape1 = 1/2, shape2 =1/2)
temp <- (Y[,1])/(Y[,2])
a[5]<- quantile(temp, prob = .95) # 177.0137
# Q2-(6)) #######
temp1 <- X[,1]
temp2 <- apply(X[,2:17], 1, function(x) sqrt(sum(x^2)))
temp <- temp1<temp2
a[6]<- length(temp)/replicate # 1


data.frame(Q = c(1:6), Ans = a)