Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #You often want to look at the multiple independent variables effects on one dependent variable.
- #for two or more continuous variables you can use a multiple regression with or without interactions
- #for example y = b0 +b1*X1+b2*X2+b3*X3+.....
- #for two categorical variables you can do a two-way ANOVA.
- #we will use a preloaded dataset for the multiple regression example
- data(trees) #lets R know to use trees
- str(trees) #look at the variables
- #plot look at the relationship between height and volume as well as diameter and volume
- library(ggplot2)
- ggplot(trees, aes(Girth, Volume))+geom_point()
- #do the same thing for Height and Volume, but add a plot title
- ggplot(trees, aes(Height, Volume))+geom_point()+ggtitle("Relationship between height and volume")
- #add a line to the points by doing a linear regression and using the y-intercept and slope
- ggplot(trees, aes(Girth, Volume))+geom_point()+geom_smooth(method="lm", se=FALSE)
- #do the same thing for Height and Volume, but add a plot title
- ggplot(trees, aes(Height, Volume))+geom_point()+geom_smooth(method="lm", se=FALSE)+ggtitle("Relationship between height and volume")
- #lets look to see if we should reject the null that the slope is 0
- resgirth=lm(Volume~Girth, data=trees) #linear regression
- resgirth #note that the output gives you the Intercept and the slope of the line
- summary(resgirth) #look at p-value and adjusted R-squared
- #do the same for height and volume
- resheight=lm(Volume~Height, data=trees)
- resheight
- summary(resheight)
- #you could get a better predictor of volume if you used both girth and height
- #luckily you can do this by just adding to the single variable lm model
- resgirthandheight=lm(Volume~Girth+Height, data=trees)
- summary(resgirthandheight)
- #note that the p-value for the Height is greater than it was in the single variable situation
- #this is because the test is essentially asking does height have any effect after taking girth into account and
- #it does add something, but not as much as when nothing else has been included previously
- #also note that the adjusted R-squared is greater than before (more is explained by the model)
- #if there was no interaction between height and girth, we would be done, but there is an interaction
- #we can include the interaction term by adding Girth*Height
- resgirthandheightinteraction=lm(Volume~Girth+Height+Girth*Height, data=trees) #QUESTION -- Why are we adding Girth*Height
- summary(resgirthandheightinteraction)
- #let's do the same thing for two categorical independent variables.
- #I am going to follow examples from this site because there is a very good explanation of
- #how to think about interaction plots and it could be a valuable resource for you later.
- #https://dzchilds.github.io/stats-for-bio/two-way-anova-in-r.html
- setwd("~/Desktop/AS450HO/RTutorialDataAS450/")
- festuca <- read.csv("FESTUCA.CSV", header=TRUE)
- str(festuca)
- View(festuca)
- #this gives you the final weight of Festuca plants grown in the presence or absenc of Calluna
- #under two different pH conditions. Notice that it is a fully factorial design.
- #look at graphs of the data
- ggplot(festuca, aes(Calluna, Weight, colour = pH)) +
- geom_boxplot()
- festuca_model<-aov(lm(Weight ~ pH + Calluna + pH:Calluna, festuca))
- summary(festuca_model)
- TukeyHSD(festuca_model, which = 'pH:Calluna')
- #interaction plots
- with(festuca, interaction.plot(pH, Calluna, Weight, fixed = TRUE)) #QUESTION - What is the point of doing an interaction plot vs a boxplot? The values inbetween don't really mean anything do they...
- with(festuca, interaction.plot(Calluna, pH, Weight, fixed = TRUE))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement