Guest User

Untitled

a guest
May 21st, 2018
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.63 KB | None | 0 0
  1. # Multiple Linear Regression
  2.  
  3. rm(list = ls())
  4.  
  5. # Importing the dataset
  6. dataset <- read.csv(file.path(getwd(),'Data/50_Startups.csv'))
  7.  
  8.  
  9. # Encoding categorical data
  10. dataset$State # to know which levels do we have in the dataset
  11. #dataset$State = factor(dataset$State,
  12. # levels = c('New York', 'California', 'Florida'),
  13. # labels = c(1, 2, 3))
  14.  
  15. # i wrote the below code, i think it is more general
  16. dataset$State <- factor(dataset$State,
  17. levels = dataset$State,
  18. labels = 1:length(dataset$State))
  19.  
  20.  
  21. # Splitting the dataset into the Training set and Test set
  22.  
  23. if (!require("caTools"))
  24. install.packages("caTools") # to install the caTools package if you don't have it
  25. set.seed(123)
  26. split <- sample.split(dataset$Profit, SplitRatio = 0.8)
  27. training_set <- subset(dataset, split == TRUE)
  28. test_set <- subset(dataset, split == FALSE)
  29.  
  30. # no need for Feature Scaling with regressio
  31.  
  32. # Fitting Multiple Linear Regression to the Training set
  33. regressor <- lm(formula = Profit ~ .,
  34. data = training_set)
  35.  
  36. # Predicting the Test set results
  37. y_pred <- predict(regressor, newdata = test_set)
  38.  
  39. # Building the optimal model
  40. # this requires to select the best variables
  41. # the code provided throught the video is stupid because the approach is manual for selecting variables
  42. # therefore, i found a good way to select the best variables
  43.  
  44.  
  45. regressor_1 <- lm(formula = Profit ~ Administration, data = dataset) # only one independent variable
  46. regressor_full <- lm(formula = Profit ~ ., data = dataset) # all the independent variables
  47.  
  48.  
  49. # The forward variable selection for regression
  50. step(regressor_1, scope = list(lower = regressor_1, upper = regressor_full),
  51. direction="forward")
  52.  
  53.  
  54. # The backward variable selection for regression
  55. step(regressor_full, data = dataset, direction = "backward")
  56.  
  57.  
  58. # The stepwise (also known as Bidirectional) variable selection for regression
  59. step(regressor_1, scope = list(upper = regressor_full), data = dataset, direction="both")
  60.  
  61.  
  62. ##Ploting##
  63.  
  64. # plot of the fly
  65. plot(dataset, pch = 16, col = "blue", main = "Title")
  66.  
  67. # Finding the correlation among variables
  68. if (!require("corrplot"))
  69. install.packages("corrplot") # to install the corrplot package if you don't have it
  70. data_cor <- cor(data.frame( c(dataset[1:3], dataset[5]))) # i had to remove the state column
  71. corrplot(data_cor, method = "number")
  72.  
  73. # Ploting the results of regression
  74. plot(regressor, pch = 16, col = "blue", main = "Title")
  75.  
  76. # The above code will give many plots, therefore, if we want a specific plot we can use:
  77. plot(regressor, pch = 16, which = 1, col = "blue", main = "Title")
Add Comment
Please, Sign In to add comment