Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.88 KB | None | 0 0
  1. ---
  2. title: "Assignment 1"
  3. author: "Nikola Danevski"
  4. date: "9/14/2019"
  5. output: pdf_document
  6. ---
  7.  
  8. ```{r setup, include=FALSE}
  9. knitr::opts_chunk$set(echo = TRUE)
  10. ```
  11. The histograms can be found below.
  12.  
  13. b) The first one has one center, sort of symmetric shape and the data is mostly assembled around the center.
  14.  
  15. c) We use ceil(log(2, n)) + 1 bins by the formula, which would result in 13, because n = 2304.
  16. However, the number of bins used by the hist() function is 10.
  17. j) We can see that the data is skewed to the right from the histogram. This is also noticeable later when
  18. we calculate the skewness and it's positive.
  19. m) We can see that the new histogram has almost perfect distribution, one-centered, symmetrical and data spread around the center. An even better sign for this is after drawing the mean, median and trimmed-mean lines which all seem to overlap.
  20. This implies the distribution is perfect.
  21. o) The new histogram with sqrt is almost bell-shaped distribution, it is one-centered and symmetrical with a lot of the data spread around the center.
  22.  
  23. ```{r}
  24. waves <- read.csv("waves.csv")
  25. summary(waves)
  26. waves_height_max <- waves$Hmax
  27. hmax_hist <- hist(waves_height_max, main = "Histogram of Hmax", xlab = "Maximum height", ylab = "Number of waves")
  28. mean_waves_height <- mean(waves_height_max) #saving the mean and median in an object
  29. median_waves_height <- median(waves_height_max)
  30. mean_waves_height #printing the values of the mean and the median
  31. median_waves_height
  32. mean_trimmed_20 <- mean(waves_height_max, trim=0.2)
  33. abline(v = mean_waves_height, col="red")
  34. abline(v = median_waves_height, col="blue")
  35. abline(v = mean_trimmed_20, col="green")
  36. text(mean_waves_height + 0.28, 600,substitute(paste(bar(x),"=",m), list(m=round(mean_waves_height,2))), col="red")
  37. text(median_waves_height - 0.28, 600,substitute(paste(tilde(x),"=",m), list(m=round(median_waves_height,2))), col="blue")
  38. text(mean_trimmed_20 - 0.34, 500,substitute(paste(bar(x)[20],"=",m), list(m=round(mean_trimmed_20,2))), col="green")
  39. ```
  40.  
  41. ```{r}
  42. hmax_hist
  43. ```
  44.  
  45.  
  46. ```{r}
  47. q1 <- quantile(waves_height_max, 0.25)
  48. q3 <- quantile(waves_height_max, 0.75)
  49. q1
  50. q3
  51. IQR_height_max <- IQR(waves_height_max)
  52. IQR_height_max
  53. lower_fence <- q1 - 1.5 * IQR_height_max #the lower fence
  54. upper_fence <- q3 + 1.5 * IQR_height_max #the upper fence
  55. lower_fence
  56. upper_fence
  57. #Now calculating the outliers
  58. waves[waves_height_max >= upper_fence, ] #upper outliers
  59. waves[waves_height_max <= lower_fence, ] #lower outliers
  60. #Now calculating variance, standard deviation and coeff. of variation of the maximum wave height.
  61. variance <- var(waves_height_max)
  62. stand_dev <- sd(waves_height_max)
  63. cv <- stand_dev / mean_waves_height
  64. variance
  65. stand_dev
  66. cv
  67. #Calculating skewness
  68. library(moments)
  69. skewness(waves_height_max)
  70. #Box-Cox transformation (TODO: WHAT DOES PART k) want from me?)
  71. #the recommended one is the middle line (page 4)
  72. #
  73. #
  74. library(MASS)
  75.  
  76. bc <- boxcox(waves_height_max~1) #original plot, we wanna see the range on the right
  77. lambda <- bc$x[bc$y == max(bc$y)] #this gives the best value for lambda
  78. new_data_box_cox <- (waves_height_max^lambda-1)/lambda #according to the formula
  79. hist(new_data_box_cox, main = "After applying Box-Cox", xlab = "New data with the transformation")
  80.  
  81. mean_new_data <- mean(new_data_box_cox)
  82. median_new_data <- median(new_data_box_cox)
  83. mean_new_data_trimmed_20 <- mean(new_data_box_cox, trim=0.2)
  84. abline(v = mean_new_data, col="red")
  85. abline(v = median_new_data, col="blue")
  86. abline(v = mean_new_data_trimmed_20, col="green")
  87. text(mean_new_data + 0.28, 400,substitute(paste(bar(x),"=",m), list(m=round(mean_new_data,2))), col="red")
  88. text(median_new_data - 0.28, 350,substitute(paste(tilde(x),"=",m), list(m=round(median_new_data,2))), col="blue")
  89. text(mean_new_data_trimmed_20 - 0.34, 400,substitute(paste(bar(x)[20],"=",m), list(m=round(mean_new_data_trimmed_20,2))), col="green")
  90.  
  91. new_data_sqrt <- (sqrt(waves_height_max))
  92. hist(new_data_sqrt, main = "After applying sqrt transformation", xlab = "New data with sqrt trans")
  93. mean_sqrt <- mean(new_data_sqrt)
  94. median_sqrt <- median(new_data_sqrt)
  95. mean_sqrt_trimmed_20 <- mean(new_data_sqrt, trim = 0.2)
  96. abline(v = mean_sqrt, col="red")
  97. abline(v = median_sqrt, col="blue")
  98. abline(v = mean_sqrt_trimmed_20, col="green")
  99. text(mean_sqrt + 0.18, 300,substitute(paste(bar(x),"=",m), list(m=round(mean_sqrt,2))), col="red")
  100. text(median_sqrt - 0.18, 350,substitute(paste(tilde(x),"=",m), list(m=round(median_sqrt,2))), col="blue")
  101. text(mean_sqrt_trimmed_20 - 0.14, 300,substitute(paste(bar(x)[20],"=",m), list(m=round(mean_sqrt_trimmed_20,2))), col="green")
  102. summary(new_data_sqrt)
  103.  
  104.  
  105. #Now we draw the QQ plots
  106. qqnorm(waves_height_max, main = "Normal data", xlab = "", ylab = "")
  107. qqline(waves_height_max)
  108.  
  109. qqnorm(new_data_box_cox, main = "Box cox transformation data", xlab = "", ylab = "")
  110. qqline(new_data_box_cox)
  111.  
  112. qqnorm(new_data_sqrt, main = "Square root data", xlab = "", ylab = "")
  113. qqline(new_data_sqrt)
  114.  
  115. #TODO -> WHAT DOES question p want?
  116.  
  117. mat <- matrix(NA, nrow=9, ncol=5)
  118. rownames(mat) <- c("Original","","","Box-Cox","","","Square Root","","")
  119. colnames(mat) <- c("k","xbar-k*s", "xbar+k*s", "Theoretical %","Actual %") #TODO fix the bar thing
  120. ### Fill in known quantities
  121. mat[,1] <- c(1,2,3)
  122. mat[,4]<- c(68,95,99.7)
  123.  
  124. mat[1,2] <- mean(waves_height_max)-1*sd(waves_height_max)
  125. mat[2,2] <- mean(waves_height_max)-2*sd(waves_height_max)
  126. mat[3,2] <- mean(waves_height_max)-3*sd(waves_height_max)
  127. mat[1,3] <- mean(waves_height_max)+1*sd(waves_height_max)
  128. mat[2,3] <- mean(waves_height_max)+2*sd(waves_height_max)
  129. mat[3,3] <- mean(waves_height_max)+3*sd(waves_height_max)
  130.  
  131. mat[4,2] <- mean(new_data_box_cox)-1*sd(new_data_box_cox)
  132. mat[5,2] <- mean(new_data_box_cox)-2*sd(new_data_box_cox)
  133. mat[6,2] <- mean(new_data_box_cox)-3*sd(new_data_box_cox)
  134. mat[4,3] <- mean(new_data_box_cox)+1*sd(new_data_box_cox)
  135. mat[5,3] <- mean(new_data_box_cox)+2*sd(new_data_box_cox)
  136. mat[6,3] <- mean(new_data_box_cox)+3*sd(new_data_box_cox)
  137.  
  138. mat[7,2] <- mean(new_data_box_cox)-1*sd(new_data_box_cox)
  139. mat[8,2] <- mean(new_data_box_cox)-2*sd(new_data_box_cox)
  140. mat[9,2] <- mean(new_data_box_cox)-3*sd(new_data_box_cox)
  141. mat[7,3] <- mean(new_data_box_cox)+1*sd(new_data_box_cox)
  142. mat[8,3] <- mean(new_data_box_cox)+2*sd(new_data_box_cox)
  143. mat[9,3] <- mean(new_data_box_cox)+3*sd(new_data_box_cox)
  144.  
  145. mat[1,5] <- sum(waves_height_max >mean(waves_height_max)-1*sd(waves_height_max) & waves_height_max < mean(waves_height_max)+1*sd(waves_height_max))/length(waves_height_max)*100
  146. mat[2,5] <- sum(waves_height_max >mean(waves_height_max)-2*sd(waves_height_max) & waves_height_max < mean(waves_height_max)+2*sd(waves_height_max))/length(waves_height_max)*100
  147. mat[3,5] <- sum(waves_height_max >mean(waves_height_max)-3*sd(waves_height_max) & waves_height_max < mean(waves_height_max)+3*sd(waves_height_max))/length(waves_height_max)*100
  148.  
  149. mat[4,5] <- sum(new_data_box_cox >mean(new_data_box_cox)-1*sd(new_data_box_cox) & new_data_box_cox < mean(new_data_box_cox)+1*sd(new_data_box_cox))/length(new_data_box_cox)*100
  150. mat[5,5] <- sum(new_data_box_cox >mean(new_data_box_cox)-2*sd(new_data_box_cox) & new_data_box_cox < mean(new_data_box_cox)+2*sd(new_data_box_cox))/length(new_data_box_cox)*100
  151. mat[6,5] <- sum(new_data_box_cox >mean(new_data_box_cox)-7*sd(new_data_box_cox) & new_data_box_cox < mean(new_data_box_cox)+7*sd(new_data_box_cox))/length(new_data_box_cox)*100
  152.  
  153. mat[7,5] <- sum(new_data_sqrt > mean(new_data_sqrt)-1*sd(new_data_sqrt) & new_data_sqrt < mean(new_data_sqrt)+1*sd(new_data_sqrt))/length(new_data_sqrt)*100
  154. mat[8,5] <- sum(new_data_sqrt > mean(new_data_sqrt)-2*sd(new_data_sqrt) & new_data_sqrt < mean(new_data_sqrt)+2*sd(new_data_sqrt))/length(new_data_sqrt)*100
  155. mat[9,5] <- sum(new_data_sqrt > mean(new_data_sqrt)-3*sd(new_data_sqrt) & new_data_sqrt < mean(new_data_sqrt)+3*sd(new_data_sqrt))/length(new_data_sqrt)*100
  156.  
  157. library(knitr)
  158. kable(x=mat, digits=2,row.names=T, format="markdown")
  159.  
  160. ```
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement