Advertisement
icatalin

PS lab 14 (razvan)

Jan 15th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.07 KB | None | 0 0
  1. #http://r-statistics.co/Linear-Regression.html
  2.  
  3. head(cars)
  4. scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed")  # scatterplot
  5.  
  6. par(mfrow=c(1, 2))  # divide graph area in 2 columns
  7. boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out))  # box plot for 'speed'
  8. boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out))  # box plot for 'distance'
  9.  
  10. cor(cars$speed, cars$dist)  # calculate correlation between speed and distance
  11.                             # accesarea datelor din fisierul cars
  12.  
  13. linearMod <- lm(dist ~ speed, data=cars)  # build linear regression model on full data
  14. print(linearMod) # operatul ~ se numeste against
  15.  
  16. par(mfrow=c(1,1))
  17.  
  18. plot(cars$speed,cars$dist)
  19. t <- seq(0,30,0.01)
  20. lines(t, 3.932*t - 17.579, col = "magenta")
  21.  
  22. summary(linearMod) #a mai fost folosita la statistica descriptiva
  23.  
  24. modelSummary <- summary(linearMod)  # capture model summary as an object
  25. modelCoeffs <- modelSummary$coefficients  # model coefficients
  26. beta.estimate <- modelCoeffs["speed", "Estimate"]  # get beta estimate for speed
  27. std.error <- modelCoeffs["speed", "Std. Error"]  # get std.error for speed
  28. t_value <- beta.estimate/std.error  # calc t statistic
  29. p_value <- 2*pt(-abs(t_value), df=nrow(cars)-ncol(cars))  # calc p Value
  30. f_statistic <- linearMod$fstatistic[1]  # fstatistic
  31. f <- summary(linearMod)$fstatistic  # parameters for model p-value calc
  32. model_p <- pf(f[1], f[2], f[3], lower=FALSE)
  33.  
  34.  
  35. # Create Training and Test data -
  36. set.seed(100)  # setting seed to reproduce results of random sampling
  37. trainingRowIndex <- sample(1:nrow(cars), 0.8*nrow(cars))  # row indices for training data
  38. trainingData <- cars[trainingRowIndex, ]  # model training data
  39. testData  <- cars[-trainingRowIndex, ]   # test data
  40.  
  41. # Build the model on training data -
  42. lmMod <- lm(dist ~ speed, data=trainingData)  # build the model
  43. distPred <- predict(lmMod, testData)  # predict distance
  44.  
  45. actuals_preds <- data.frame(cbind(actuals=testData$dist, predicteds=distPred))  # make actuals_predicteds dataframe.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement