Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Multiple Linear Regression
- rm(list = ls())
- # Importing the dataset
- dataset <- read.csv(file.path(getwd(),'Data/50_Startups.csv'))
- # Encoding categorical data
- dataset$State # to know which levels do we have in the dataset
- #dataset$State = factor(dataset$State,
- # levels = c('New York', 'California', 'Florida'),
- # labels = c(1, 2, 3))
- # i wrote the below code, i think it is more general
- dataset$State <- factor(dataset$State,
- levels = dataset$State,
- labels = 1:length(dataset$State))
- # Splitting the dataset into the Training set and Test set
- if (!require("caTools"))
- install.packages("caTools") # to install the caTools package if you don't have it
- set.seed(123)
- split <- sample.split(dataset$Profit, SplitRatio = 0.8)
- training_set <- subset(dataset, split == TRUE)
- test_set <- subset(dataset, split == FALSE)
- # no need for Feature Scaling with regressio
- # Fitting Multiple Linear Regression to the Training set
- regressor <- lm(formula = Profit ~ .,
- data = training_set)
- # Predicting the Test set results
- y_pred <- predict(regressor, newdata = test_set)
- # Building the optimal model
- # this requires to select the best variables
- # the code provided throught the video is stupid because the approach is manual for selecting variables
- # therefore, i found a good way to select the best variables
- regressor_1 <- lm(formula = Profit ~ Administration, data = dataset) # only one independent variable
- regressor_full <- lm(formula = Profit ~ ., data = dataset) # all the independent variables
- # The forward variable selection for regression
- step(regressor_1, scope = list(lower = regressor_1, upper = regressor_full),
- direction="forward")
- # The backward variable selection for regression
- step(regressor_full, data = dataset, direction = "backward")
- # The stepwise (also known as Bidirectional) variable selection for regression
- step(regressor_1, scope = list(upper = regressor_full), data = dataset, direction="both")
- ##Ploting##
- # plot of the fly
- plot(dataset, pch = 16, col = "blue", main = "Title")
- # Finding the correlation among variables
- if (!require("corrplot"))
- install.packages("corrplot") # to install the corrplot package if you don't have it
- data_cor <- cor(data.frame( c(dataset[1:3], dataset[5]))) # i had to remove the state column
- corrplot(data_cor, method = "number")
- # Ploting the results of regression
- plot(regressor, pch = 16, col = "blue", main = "Title")
- # The above code will give many plots, therefore, if we want a specific plot we can use:
- plot(regressor, pch = 16, which = 1, col = "blue", main = "Title")
Add Comment
Please, Sign In to add comment