Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Polynomial Regression
- rm(list = ls())
- # Importing the dataset
- dataset <- read.csv(file.path(getwd(),'Data/Position_Salaries.csv'))
- dataset <- dataset[2:3]
- plot(x = dataset$Level, y = dataset$Salary) # ploting data on the fly ;)
- # no need to split data because the dataset is small
- # no need for feature scaling with regression
- # Fitting Linear Regression to the dataset
- # instead of using "." we can just use the name of the column since it is only one column
- lin_reg <- lm(formula = Salary ~ .,
- data = dataset)
- # Fitting Polynomial Regression to the dataset
- dataset$Level2 <- dataset$Level^2
- dataset$Level3 <- dataset$Level^3
- dataset$Level4 <- dataset$Level^4
- poly_reg <- lm(formula = Salary ~ ., data = dataset)
- # However, i found an easier way to do polynomial regression
- # the keyword "poly" help us decide the level of polynomial
- # in the previous code we are doing it manually , stupid!
- poly_reg <- lm(formula = Salary ~ poly (Level, 4), data = dataset)
- # the above code is using orthogonal poly regression
- # however, if we dont want that, we can use:
- poly_reg <- lm(formula = Salary ~ poly (Level, 4, raw = TRUE), data = dataset)
- # I added this part
- # Plot of fitted vs residuals.
- # No clear pattern should show in the residual plot if the model is a good fit
- # for the simple linear regression
- plot(fitted(lin_reg), residuals(lin_reg))
- # for the polynomial linear regression
- plot(fitted(poly_reg), residuals(poly_reg))
- # Visualising the Linear Regression results
- if (!require("ggplot2"))
- install.packages("ggplot2") # to install the package if you don't have it
- ggplot() +
- geom_point(aes(x = dataset$Level, y = dataset$Salary),
- colour = 'red') +
- geom_line(aes(x = dataset$Level, y = predict(lin_reg, newdata = dataset)),
- colour = 'blue') +
- ggtitle('Truth or Bluff (Linear Regression)') +
- xlab('Level') +
- ylab('Salary')
- # Visualising the Polynomial Regression results
- # install.packages('ggplot2')
- library(ggplot2)
- ggplot() +
- geom_point(aes(x = dataset$Level, y = dataset$Salary),
- colour = 'red') +
- geom_line(aes(x = dataset$Level, y = predict(poly_reg, newdata = dataset)),
- colour = 'blue') +
- ggtitle('Truth or Bluff (Polynomial Regression)') +
- xlab('Level') +
- ylab('Salary') +
- theme_bw() +
- theme(plot.title = element_text(hjust = 0.5))
- # Visualising the Regression Model results (for higher resolution and smoother curve)
- # install.packages('ggplot2')
- library(ggplot2)
- x_grid = seq(min(dataset$Level), max(dataset$Level), 0.1)
- ggplot() +
- geom_point(aes(x = dataset$Level, y = dataset$Salary),
- colour = 'red') +
- geom_line(aes(x = x_grid, y = predict(poly_reg,
- newdata = data.frame(Level = x_grid,
- Level2 = x_grid^2,
- Level3 = x_grid^3,
- Level4 = x_grid^4))),
- colour = 'blue') +
- ggtitle('Truth or Bluff (Polynomial Regression)') +
- xlab('Level') +
- ylab('Salary') +
- theme_bw() +
- theme(plot.title = element_text(hjust = 0.5))
- # Predicting a new result with Linear Regression
- predict(lin_reg, data.frame(Level = 6.5))
- # Predicting a new result with Polynomial Regression
- predict(poly_reg, data.frame(Level = 6.5,
- Level2 = 6.5^2,
- Level3 = 6.5^3,
- Level4 = 6.5^4))
Add Comment
Please, Sign In to add comment