Guest User

Untitled

a guest
Oct 22nd, 2017
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.37 KB | None | 0 0
  1. # Predict how much tip will get per our clients age
  2. # Using a simple linear regression model
  3. dataset = read.csv('tip.csv')
  4.  
  5. library(caTools)
  6. # choose random number for the see
  7. set.seed(100)
  8. # Split the dataset between test and train set
  9. split = sample.split(dataset$Tip, SplitRatio = 2/3)
  10. training_set = subset(dataset, split == TRUE)
  11. test_set = subset(dataset, split == FALSE)
  12.  
  13. regressor = lm(formula = Tip ~ Age,
  14. data = training_set)
  15.  
  16. # I'll be using ggplot2 to visualize the data
  17. # installing `ggplot2`
  18. install.packages('ggplot2')
  19. # load library
  20. library(ggplot2)
  21.  
  22. # First we should generate a graph using our train set
  23. # this will give us the slope for our train set which later we can compare it
  24. # with our test set
  25. ggplot() +
  26. geom_point(aes(x = training_set$Age,
  27. y = training_set$Tip),
  28. colour = 'red') +
  29. geom_line(aes(x = training_set$Age,
  30. y = predict(regressor, newdata = training_set)),
  31. colour = 'blue') +
  32. ggtitle('Tip vs Client Age (Training set)') +
  33. xlab('Client Age') +
  34. ylab('Tip')
  35.  
  36. ggplot() +
  37. geom_point(aes(x = test_set$Age,
  38. y = test_set$Tip),
  39. colour = 'green') +
  40. geom_line(aes(x = training_set$Age,
  41. y = predict(regressor, newdata = training_set)),
  42. colour = 'red') +
  43. ggtitle('Tip vs Client Age (Test Set)') +
  44. xlab('Client Age') +
  45. ylab('Tip')
Add Comment
Please, Sign In to add comment