Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Saving my stuff
- save(list = ls(all.names = TRUE), file = "summer201.RData", envir =.GlobalEnv)
- #...MISSED A LOT HERE...
- #R can compare numbers and give you a T/F answer
- 1<2
- 2>1
- 1<=1.1
- 4>=4
- 4<4
- #Entering data into R (enter simple values "aka objects")
- neighbour<-(660+7)
- answer<-(54)
- fruit<-("apple")
- fruit<-("apple core") #Change the value of the object
- fruit2<-("apple")
- neighbour+answer
- answer+fruit #doesn't work because one is non-numerical
- answer<-answer+1 #object answer now equals another
- #entering data into R (enter a list of values - similar to a variable)
- #character
- name <- c("Apple", "Banana", "Pear", "Peach", "Kiwi", "Pineapple")
- #numeric
- quantity <-c(3, 5, 0, 15, 4, 12)
- quantity #view list in the console
- #create a dataset - must be a dataframe
- Fruits<-data.frame(name, quantity)
- Fruits #view in console
- View(Fruits) #view dataset
- #Removing objects so our enviornment is not cluttered
- #Remove single objects
- rm(quantity)
- rm(name)
- #MISSED
- #Importing existing datasets into R (from a .csv file)
- #From your computer - Macs
- CrimeRate <- read.csv("filepath", header = T, sep = ",", stringsAsFactors = FALSE)
- #From your computer - PCs
- CrimeRate<-read.csv("C:/Users/nicol/Downloads/crimerate.csv", header = T, sep = ",", stringsAsFactors = FALSE)
- View(CrimeRate)
- #From the internet
- DataName<-read.csv("webLink", header = T, sep = ",", stringsAsFactors = FALSE)
- #Descriptive Statistics
- #the mean
- mean(CrimeRate$Crime.Rate) #This will return NA if there is missing data
- mean(CrimeRate$Crime.Rate, na.rm = TRUE) #you should always add the na.rm = TRUE as a habit to remove missing data
- #the median
- median(CrimeRate$Crime.Rate, na.rm = TRUE)
- #the minimum
- min(CrimeRate$Crime.Rate, na.rm = TRUE)
- #the interquartile range
- max(CrimeRate$Crime.Rate, na.rm = TRUE)
- #the InterQuartile Range
- IQR(CrimeRate$Crime.Rate, na.rm = TRUE)
- #Let's do all that together!
- summary(CrimeRate$Crime.Rate, na.rm = TRUE)
- sum(CrimeRate$Crime.Rate, na.rm = TRUE)
- #frequencies and relative frequencies
- table(CrimeRate$Crime.Rate)
- table(CrimeRate$Crime.Rate)/length(CrimeRate$Crime.Rate)
- #But that's pretty useless. So we can build on the table command to do intervals
- CRfr<-CrimeRate$Crime.Rate #input data as a list
- breaks<-seq(0, 80, by=10) #identify bins
- CRfr.cut<-cut(CRfr, breaks, right=FALSE) #cut dataset into bins, which are defined by breaks
- CRfr.freq<-table(CRfr.cut) #create a new list with these bins
- View(CRfr.freq)
- CRfr.freq<-table(CRfr.cut)/length(CRfr.cut) #now create a new list with relative freq
- View(CRfr.freq)
- #Visual descriptions
- install.packages("ggplot2")
- y
- library(ggplot2)
- #do histogram
- ggplot(CrimeRate,aes(x=Crime.Rate))+
- geom_histogram()
- #change binwidth
- ggplot(CrimeRate,aes(x=Crime.Rate))+
- geom_histogram(binwidth = 10)
- #Make it start at 0 and make each bin not incl. the right edge
- #but be careful, you want it to start at 0 in case but it may not always be
- #for instance, if you have negative values, this would be wrong
- ggplot(CrimeRate,aes(x=Crime.Rate))+
- geom_histogram(boundary = 0, bandwidth = 10, right = FALSE)
- #add titles and axis labels
- ggplot(CrimeRate,aes(x=Crime.Rate))+
- geom_histogram(boundary=0, colour = "black", bindwidth = 10, fill = "light blue", right = FALSE, no.rm = FALSE)+
- xlab("Crime Rate")+
- ylab("Frequency")+
- ggtitle("Crime Rate in the US - 2005")
- #BOXPLOT
- ggplot(CrimeRate, aes(x=factor(0), y = Crime.Rate))+
- geom_boxplot(colour = "black", fill ="orange", na.rm = FALSE)+
- xlab("")+
- scale_x_discrete(breaks = NULL)+
- ylab("Crime Rate")+
- ggtitle("Crime Rate in the US - 2005")
- boxplot(CrimeRate$Crime.Rate ~ Fruits$Data.Frame)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement