Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(data.table)
- require(ggplot2)
- require(MASS)
- require(fBasics)
- require(moments)
- alpha = 0.05
- #Question 1
- # 1. Consider the daily stock returns of American Express (AXP), Caterpillar (CAT), and Starbucks
- # (SBUX) from January 1999 to December 2008. The data are simple returns given in the file d-3stocks9908.txt (date, axp, cat, sbux).
- # (a) Compute the sample mean, standard deviation, skewness, excess kurtosis, minimum, and maximum of each simple return series. (Hint: use the R command basicStats of fBasics)
- # (b) Transform the simple returns to log returns. Compute the sample mean, standard deviation, skewness, excess kurtosis, minimum, and maximum of each log return series.
- # (c) Test the null hypothesis that the mean of the log returns of AXP is zero. (Hint: use the R command t.test)
- # (d) Obtain the histogram (with nclass=40) and sample density plot of the daily log returns of AXP stock.
- # (e) Test if log return of AXP follows normal distribution by using at least two methods. (Need to give explanation)
- # Import Data
- q1_data = fread("d-3stocks9908.txt",header=T)
- # Part a
- q1_data_stats = basicStats(q1_data)
- sprintf("Getting Simple Returns")
- for (stock in colnames(q1_data_stats)[-1]){
- print(sprintf("%s (Simple Returns): Mean = %s , Stdev = %s, Skewness = %s , Kurtosis = %s , Minimum = %s , Maximum = %s",
- stock , q1_data_stats["Mean",stock] , q1_data_stats["Stdev",stock] , q1_data_stats["Skewness",stock] , q1_data_stats["Kurtosis",stock] , q1_data_stats["Minimum",stock] , q1_data_stats["Maximum",stock] ))
- }
- # Part B
- q1_logR = log(q1_data+1)
- q1_logR_stats = basicStats(q1_logR)
- for (stock in colnames(q1_logR_stats)[-1]){
- print(sprintf("%s (Log Returns): Mean = %s , Stdev = %s, Skewness = %s , Kurtosis = %s , Minimum = %s , Maximum = %s",
- stock , q1_logR_stats["Mean",stock] , q1_logR_stats["Stdev",stock] , q1_logR_stats["Skewness",stock] , q1_logR_stats["Kurtosis",stock] , q1_logR_stats["Minimum",stock] , q1_logR_stats["Maximum",stock] ))
- }
- # Part c
- axp_pvalue = t.test(q1_logR$axp)$p.value
- sprintf("AXP: The p-value is %s, hence we %s the null hypothesis that the mean is 0", axp_pvalue , if (axp_pvalue <= alpha) "Reject" else "Do not reject")
- # Part D
- ggplot(data = q1_logR, aes(q1_logR$axp)) +
- geom_histogram(bins = 40, col="grey", fill="green", alpha = .2) +
- labs(title="Histogram of Daily Log Returns") +
- labs(x="Daily Log Returns", y="Count")
- plot(density(q1_logR$axp))
- # Part E
- axp_jb_test = normalTest(q1_logR$axp,method="jb")
- axp_jb_pvalue = axp_jb_test@test$p.value
- sprintf("Jarque Bera Test returns the values for the 'Chi-squared' statistic with 2 degrees of freedom, and the asymptotic p-value.")
- sprintf("P Value is %s, hence %s null hypothesis of normality", axp_jb_pvalue ,if (axp_jb_pvalue <= alpha) "Reject" else "Do not reject" )
- axp_sw_test = normalTest(q1_logR$axp,method="sw")
- axp_sw_pvalue = axp_sw_test@test$p.value
- sprintf("Shapiro Test returns the values for the 'W' statistic and the p-value")
- sprintf("P Value is %s, hence %s null hypothesis of normality", axp_sw_pvalue ,if (axp_sw_pvalue <= alpha) "Reject" else "Do not reject" )
- #Question 2
- # 2. Answer the same questions as Problem 1 but using monthly returns for General Motors (GM),
- # CRSP value-weighted index (VW), CRSP equal-weighted index (EW) and S&P composite index
- # from January 1975 to December 2008. The returns of the indexes include dividend distributions.
- # Data file is m-gm3dx7508.txt (date, gm, vw, ew, sp).
- # Import Data
- q2_data <- fread("m-gm3dx7508.txt",header=T)
- # Part a
- q2_data_stats = basicStats(q2_data)
- for (stock in colnames(q2_data_stats)[-1]){
- print(sprintf("%s (Simple Returns): Mean = %s , Stdev = %s, Skewness = %s , Kurtosis = %s , Minimum = %s , Maximum = %s",
- stock , q2_data_stats["Mean",stock] , q2_data_stats["Stdev",stock] , q2_data_stats["Skewness",stock] , q2_data_stats["Kurtosis",stock] , q2_data_stats["Minimum",stock] , q2_data_stats["Maximum",stock] ))
- }
- # Part B
- q2_logR = log(q2_data+1)
- q2_logR_stats = basicStats(q2_logR)
- for (stock in colnames(q2_logR_stats)[-1]){
- print(sprintf("%s (Log Returns): Mean = %s , Stdev = %s, Skewness = %s , Kurtosis = %s , Minimum = %s , Maximum = %s",
- stock , q2_logR_stats["Mean",stock] , q2_logR_stats["Stdev",stock] , q2_logR_stats["Skewness",stock] , q2_logR_stats["Kurtosis",stock] , q2_logR_stats["Minimum",stock] , q2_logR_stats["Maximum",stock] ))
- }
- for (stock in colnames(q2_logR)[-1]){
- # Part c
- pvalue = t.test(q2_logR_stats[,stock])$p.value
- print(sprintf("%s: The p-value is %s, hence we %s the null hypothesis that the mean is 0", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject"))
- # Part D
- print( ggplot(data = q2_logR, aes(q2_logR[[stock]])) +
- geom_histogram(bins = 40, col="grey", fill="green", alpha = .2) +
- labs(title= cat("Histogram of Daily Log Returns")) +
- labs(x=paste0("Daily Log Returns for ", stock), y="Count"))
- plot(density(q2_logR[[stock]]))
- # Part E
- jb_test = normalTest(q2_logR[[stock]],method="jb")
- jb_pvalue = jb_test@test$p.value
- print(sprintf("Jarque Bera Test returns the values for the 'Chi-squared' statistic with 2 degrees of freedom, and the asymptotic p-value."))
- print(sprintf("P Value is %s, hence %s null hypothesis of normality", jb_pvalue ,if (jb_pvalue <= alpha) "Reject" else "Do not reject" ))
- sw_test = normalTest(q2_logR[[stock]],method="sw")
- sw_pvalue = sw_test@test$p.value
- print(sprintf("Shapiro Test returns the values for the 'W' statistic and the p-value"))
- print(sprintf("P Value is %s, hence %s null hypothesis of normality", sw_pvalue ,if (sw_pvalue <= alpha) "Reject" else "Do not reject" ))
- }
- # Question 3
- # 3. Consider the monthly stock returns of value-weighted index (VW) from January 1975 to December 2008 in Problem 2.
- # Perform the tests and draw conclusions using the 5% significance level.
- # (a) Test H0: u = 0 versus Ha : μ != 0,where μ denotes the mean return.
- # (b) Test H0: m3 = 0 versus Ha : m3 != 0, where m3 denotes the skewness.
- # (c) Test H0: K = 3 versus Ha : K != 3, where K denotes the kurtosis.
- # Part a
- stock = "vw"
- pvalue = t.test(q2_logR[[stock]] ) $p.value
- sprintf("%s: The p-value is %s, hence we %s the null hypothesis that the mean is 0", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- # Part b
- pvalue = agostino.test(q2_logR[[stock]] )$p.value
- sprintf("%s:: The p-value is %s, hence we %s the null hypothesis that skewness is 0", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- # Part c
- pvalue = anscombe.test(q2_logR[[stock]] )$p.value
- sprintf("%s:: The p-value is %s, hence we %s the null hypothesis that the kurtosis is 3", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- # Question 4
- # 4. Consider the daily log returns of AXP stock from January 1999 to December 2008 as in
- # Problem 1. Perform the following tests:
- # (a) Test the null hypothesis that the skewness measure of the returns is zero;
- # (b) Test the null hypothesis that the excess kurtosis of the returns is zero.
- stock = "axp"
- pvalue = agostino.test(q1_logR[[stock]] )$p.value
- sprintf("%s:: The p-value is %s, hence we %s the null hypothesis that skewness is 0", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- # Part c
- pvalue = anscombe.test(q1_logR[[stock]] )$p.value
- sprintf("%s:: The p-value is %s, hence we %s the null hypothesis that the kurtosis is 3", stock, pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- #Question 5
- # 5. Daily foreign exchange rates (spot rates) can be obtained from the Federal Reserve Bank in St
- # Louis (FRED). The data are the noon buying rates in New York City certified by the Federal
- # Reserve Bank of New York. Consider the exchange rates between the U.S. dollar and the Euro
- # from January 4, 1999 to March 8, 2013. See the file d-exuseu.txt.
- #
- # (a) Compute the daily log return of the exchange rate.
- # (b) Compute the sample mean, standard deviation, skewness, excess kurtosis, minimum, and
- # maximum of the log returns of the exchange rate.
- # (c) Obtain a density plot of the daily long returns of Dollar-Euro exchange rate.
- # (d) Test H0: u = 0 versus Ha : u != 0, where μ denotes the mean of the daily log return of Dollar-Euro exchange rate.
- # Par A
- q5_data = fread("d-exuseu.txt",header=T)
- q5_logR = diff(log(q5_data$VALUE))
- #5b
- q5_stats = basicStats(q5_logR)
- sprintf("FRED (Log Returns): Mean = %s , Stdev = %s, Skewness = %s , Kurtosis = %s , Minimum = %s , Maximum = %s",
- q5_stats["Mean",] , q5_stats["Stdev",] , q5_stats["Skewness",] , q5_stats["Kurtosis",] , q5_stats["Minimum",] , q5_stats["Maximum",] )
- #5c
- plot(density(q5_logR))
- #5d
- pvalue = t.test(q5_logR)$p.value
- sprintf("%s: The p-value is %s, hence we %s the null hypothesis that the mean is 0", "Dollar Euro Exchange Return", pvalue , if (pvalue <= alpha) "Reject" else "Do not reject")
- #Question 6
- # 6. Consider SP500 and IBM data (in file ‘SP.csv’ and ‘IBM.csv’ respectively):
- # (a) Find the log return of SP and IBM by using ‘Adjusted Price’
- # (b) Merge the above two return series into a new dataset (hint: using ‘data.frame’) and plot them (y is IBM, x is SP)
- # (c) Find the best linear model by using AIC as criterion (need to show all your models)
- sp_data = fread("SP.csv",header=T)
- ibm_data = fread("IBM.csv",header=T)
- # Part A
- sp_logR = diff(log(sp_data[["SP.Adjusted"]]))
- ibm_logR = diff(log(ibm_data[["IBM.Adjusted"]]))
- # Part B
- new_table = data.table(y = ibm_logR , x = sp_logR)
- qplot(new_table$y, new_table$x)
- # Part C
- aic_list = list()
- # MODEL 1: Simple linear regression with intercept
- m1 <- lm(y~x, new_table)
- summary(m1)
- aic_list = c(aic_list ,AIC(m1))
- # MODEL 2: Simple linear regression with no intercept
- m2 <- lm(y ~ -1 + x, new_table)
- summary(m2)
- aic_list = c(aic_list ,AIC(m2))
- # Get index of all elements whose returns are non positive
- idx = which(new_table$x <= 0)
- # List with non positive returns, otherwise 0
- nsp <- rep(0,length(new_table$x))
- nsp[idx] = new_table$x[idx]
- # List with 1 when non positive returns, otherwise 0
- c1 <- rep(0,length(new_table$x))
- c1[idx] = 1
- new_table2 <- data.frame(y = new_table$y, x = new_table$x, c1, nsp) # Show the resulting variables
- # Model 3: Different intercepts for positive, and non positive returns
- m3 <- lm(y ~ x + c1 , new_table2) #with different intercepts (alpha) for positive and negative SP log returns
- summary(m3)
- aic_list = c(aic_list ,AIC(m3))
- # Model 4: Different coefficients for positive and non positive returns
- m4 <- lm(y ~ x + nsp, new_table2) #with different coefficients (beta) for positive and negative SP log returns
- summary(m4)
- aic_list = c(aic_list ,AIC(m4))
- # Model 5: Different coefficients and intercept for positive and non positive returns
- m5 <- lm(y ~ x + c1 + nsp, new_table2)
- summary(m5)
- aic_list = c(aic_list ,AIC(m5))
- idx = which.min(aic_list)
- sprintf("The best model is model %s, with an AIC of %s" , idx , aic_list[idx])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement