Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Predict how much tip will get per our clients age
- # Using a simple linear regression model
- dataset = read.csv('tip.csv')
- library(caTools)
- # choose random number for the see
- set.seed(100)
- # Split the dataset between test and train set
- split = sample.split(dataset$Tip, SplitRatio = 2/3)
- training_set = subset(dataset, split == TRUE)
- test_set = subset(dataset, split == FALSE)
- regressor = lm(formula = Tip ~ Age,
- data = training_set)
- # I'll be using ggplot2 to visualize the data
- # installing `ggplot2`
- install.packages('ggplot2')
- # load library
- library(ggplot2)
- # First we should generate a graph using our train set
- # this will give us the slope for our train set which later we can compare it
- # with our test set
- ggplot() +
- geom_point(aes(x = training_set$Age,
- y = training_set$Tip),
- colour = 'red') +
- geom_line(aes(x = training_set$Age,
- y = predict(regressor, newdata = training_set)),
- colour = 'blue') +
- ggtitle('Tip vs Client Age (Training set)') +
- xlab('Client Age') +
- ylab('Tip')
- ggplot() +
- geom_point(aes(x = test_set$Age,
- y = test_set$Tip),
- colour = 'green') +
- geom_line(aes(x = training_set$Age,
- y = predict(regressor, newdata = training_set)),
- colour = 'red') +
- ggtitle('Tip vs Client Age (Test Set)') +
- xlab('Client Age') +
- ylab('Tip')
Add Comment
Please, Sign In to add comment