Guest User

Untitled

a guest
Jul 18th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.07 KB | None | 0 0
  1. #Read data set and check structure and names
  2. a3 = read.csv("C:/Users/Ricky/Desktop/STA302/Assignment 3/PPRSA_data_2014.1.1-2014.12.31.csv",header=T) dim(a3)
  3.  
  4. str(a3)
  5.  
  6. class(a3)
  7.  
  8. names(a3)
  9.  
  10. #Define diagonal and lower panel of the scatter plot matrix
  11. panel.hist <- function(x, ...)
  12. {
  13. usr <- par("usr"); on.exit(par(usr))
  14. par(usr = c(usr[1:2], 0, 1.5) )
  15. h <- hist(x, plot = FALSE)
  16. breaks <- h$breaks; nB <- length(breaks)
  17. y <- h$counts; y <- y/max(y)
  18. rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
  19. }
  20. panel.cor <- function(x, y, digits = 2, cex.cor, ...)
  21. {
  22. usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1))
  23. r <- cor(x, y)
  24. txt <- format(c(r, 0.123456789), digits = digits)[1]
  25. txt <- paste("r= ", txt, sep = "") text(0.5, 0.6, txt)
  26. p <- cor.test(x, y)$p.value
  27. txt2 <- format(c(p, 0.123456789), digits = digits)[1] txt2 <- paste("p= ", txt2, sep = "")
  28. if(p<0.01) txt2 <- paste("p= ", "<0.01", sep = "") text(0.5, 0.4, txt2)
  29. }
  30.  
  31. #Buil Model 0 with only y
  32. m0 <- lm(pm2.5 ~ 1, data = a3) summary(m0)
  33.  
  34. anova(m1)
  35.  
  36. pairs(pm2.5 ~ month + hour + DEWP + TEMP + PRES + Iws + Is + Ir, main = "Beijing PM2.5", data = a3)
  37.  
  38. #Select the best model according to AIC
  39. mboth = step ( m0 ,scope = list ( lower = formula ( m0 ),upper = formula ( m1 )),direction = "both")
  40.  
  41. #Build Model 2 with log(y) transformation
  42. m2 <- lm(log(pm2.5) ~ month + hour + DEWP + TEMP + PRES + Iws + Is + Ir, data = a3) summary(m2)
  43.  
  44. anova(m2)
  45.  
  46. #Select the best model according to AIC
  47. Beijing PM2.5
  48. mboth = step ( m00 ,scope = list ( lower = formula ( m00 ),upper = formula ( m2 )), direction = "both")
  49.  
  50. formula ( mboth )
  51.  
  52. summary ( mboth )
  53.  
  54. #Build Model 3 with sqrt(y)
  55. m3 <- lm(sqrt(pm2.5) ~ month + hour + DEWP + TEMP + PRES + Iws + Is + Ir, data = a3) summary(m3)
  56.  
  57. #Plot Residual plot, Q-Q plot and Scale-location plot for Model 1, 2 & 3
  58. par(mfrow=c(3,3))
  59. plot(m1,main="Raw data")
  60. plot(m2,which=1,main="Log(pm2.5)")
  61. plot(m3,which=1,main="Sqrt(pm2.5)")
  62. plot(m1,which=2,main="Raw data")
  63. plot(m2,which=2,main="Log(pm2.5)")
  64. plot(m3,which=2,main="Sqrt(pm2.5)")
  65. plot(m1,which=3,main="Raw data")
  66. plot(m2,which=3,main="Log(pm2.5)")
  67. plot(m3,which=3,main="Sqrt(pm2.5)")
  68.  
  69. #Use box-cox method to find optimal lumbda
  70. library(MASS) bc=boxcox(m1,lambda=seq(-2,2,by=0.01))
  71. bc$x[which.max(bc$y)]
  72.  
  73. #Perform Log transformation on Iws
  74. m4 <- lm(log(pm2.5) ~ month + hour + DEWP + TEMP + PRES + log(Iws) + Is + Ir, data = a3) summary(m4)
  75. anova(m4)
  76.  
  77. #Model 5 with Log(Iws), excluding TEMP and PRES
  78. m5 <- lm(log(pm2.5) ~ month + hour + DEWP + log(Iws) + Is + Ir, data = a3) summary(m5)
  79.  
  80. #compare AICs of all models
  81. c(AIC(m1), AIC(m2), AIC(m3), AIC(m4), AIC(m5))
  82.  
  83. #Calculating Confidence interval for all models
  84. confint(m1, level = 0.95)
  85. confint(m2, level = 0.95)
  86. confint(m3 ,level = 0.95)
  87. confint(m4, level = 0.95)
  88. confint(m5, level = 0.95)
  89.  
  90. #Taking prediction interval for all models at new x's
  91. newdata = data.frame(month = 6, hour = 18, DEWP = -10, TEMP = 14, PRES = 1015, Iws = 50, I predict(m1, newdata, interval="predict")
  92. predict(m2, newdata, interval="predict")
  93. predict(m3, newdata, interval="predict")
  94. predict(m4, newdata, interval="predict")
  95. predict(m5, newdata, interval="predict")
Add Comment
Please, Sign In to add comment