Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #http://r-statistics.co/Linear-Regression.html
- head(cars)
- scatter.smooth(x=cars$speed, y=cars$dist, main="Dist ~ Speed") # scatterplot
- par(mfrow=c(1, 2)) # divide graph area in 2 columns
- boxplot(cars$speed, main="Speed", sub=paste("Outlier rows: ", boxplot.stats(cars$speed)$out)) # box plot for 'speed'
- boxplot(cars$dist, main="Distance", sub=paste("Outlier rows: ", boxplot.stats(cars$dist)$out)) # box plot for 'distance'
- cor(cars$speed, cars$dist) # calculate correlation between speed and distance
- # accesarea datelor din fisierul cars
- linearMod <- lm(dist ~ speed, data=cars) # build linear regression model on full data
- print(linearMod) # operatul ~ se numeste against
- par(mfrow=c(1,1))
- plot(cars$speed,cars$dist)
- t <- seq(0,30,0.01)
- lines(t, 3.932*t - 17.579, col = "magenta")
- summary(linearMod) #a mai fost folosita la statistica descriptiva
- modelSummary <- summary(linearMod) # capture model summary as an object
- modelCoeffs <- modelSummary$coefficients # model coefficients
- beta.estimate <- modelCoeffs["speed", "Estimate"] # get beta estimate for speed
- std.error <- modelCoeffs["speed", "Std. Error"] # get std.error for speed
- t_value <- beta.estimate/std.error # calc t statistic
- p_value <- 2*pt(-abs(t_value), df=nrow(cars)-ncol(cars)) # calc p Value
- f_statistic <- linearMod$fstatistic[1] # fstatistic
- f <- summary(linearMod)$fstatistic # parameters for model p-value calc
- model_p <- pf(f[1], f[2], f[3], lower=FALSE)
- # Create Training and Test data -
- set.seed(100) # setting seed to reproduce results of random sampling
- trainingRowIndex <- sample(1:nrow(cars), 0.8*nrow(cars)) # row indices for training data
- trainingData <- cars[trainingRowIndex, ] # model training data
- testData <- cars[-trainingRowIndex, ] # test data
- # Build the model on training data -
- lmMod <- lm(dist ~ speed, data=trainingData) # build the model
- distPred <- predict(lmMod, testData) # predict distance
- actuals_preds <- data.frame(cbind(actuals=testData$dist, predicteds=distPred)) # make actuals_predicteds dataframe.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement