Advertisement
Guest User

Untitled

a guest
May 25th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.68 KB | None | 0 0
  1. #Saving my stuff
  2. save(list = ls(all.names = TRUE), file = "summer201.RData", envir =.GlobalEnv)
  3.  
  4. #...MISSED A LOT HERE...
  5.  
  6. #R can compare numbers and give you a T/F answer
  7. 1<2
  8. 2>1
  9. 1<=1.1
  10. 4>=4
  11. 4<4
  12.  
  13. #Entering data into R (enter simple values "aka objects")
  14. neighbour<-(660+7)
  15. answer<-(54)
  16. fruit<-("apple")
  17. fruit<-("apple core") #Change the value of the object
  18. fruit2<-("apple")
  19. neighbour+answer
  20. answer+fruit #doesn't work because one is non-numerical
  21. answer<-answer+1 #object answer now equals another
  22.  
  23. #entering data into R (enter a list of values - similar to a variable)
  24. #character
  25. name <- c("Apple", "Banana", "Pear", "Peach", "Kiwi", "Pineapple")
  26. #numeric
  27. quantity <-c(3, 5, 0, 15, 4, 12)
  28. quantity #view list in the console
  29.  
  30. #create a dataset - must be a dataframe
  31. Fruits<-data.frame(name, quantity)
  32. Fruits #view in console
  33. View(Fruits) #view dataset
  34.  
  35. #Removing objects so our enviornment is not cluttered
  36. #Remove single objects
  37. rm(quantity)
  38. rm(name)
  39. #MISSED
  40.  
  41. #Importing existing datasets into R (from a .csv file)
  42. #From your computer - Macs
  43. CrimeRate <- read.csv("filepath", header = T, sep = ",", stringsAsFactors = FALSE)
  44.  
  45. #From your computer - PCs
  46. CrimeRate<-read.csv("C:/Users/nicol/Downloads/crimerate.csv", header = T, sep = ",", stringsAsFactors = FALSE)
  47. View(CrimeRate)
  48.  
  49. #From the internet
  50. DataName<-read.csv("webLink", header = T, sep = ",", stringsAsFactors = FALSE)
  51.  
  52. #Descriptive Statistics
  53. #the mean
  54. mean(CrimeRate$Crime.Rate) #This will return NA if there is missing data
  55. mean(CrimeRate$Crime.Rate, na.rm = TRUE) #you should always add the na.rm = TRUE as a habit to remove missing data
  56.  
  57. #the median
  58. median(CrimeRate$Crime.Rate, na.rm = TRUE)
  59.  
  60. #the minimum
  61. min(CrimeRate$Crime.Rate, na.rm = TRUE)
  62.  
  63. #the interquartile range
  64. max(CrimeRate$Crime.Rate, na.rm = TRUE)
  65.  
  66. #the InterQuartile Range
  67. IQR(CrimeRate$Crime.Rate, na.rm = TRUE)
  68.  
  69. #Let's do all that together!
  70. summary(CrimeRate$Crime.Rate, na.rm = TRUE)
  71. sum(CrimeRate$Crime.Rate, na.rm = TRUE)
  72.  
  73. #frequencies and relative frequencies
  74. table(CrimeRate$Crime.Rate)
  75. table(CrimeRate$Crime.Rate)/length(CrimeRate$Crime.Rate)
  76.  
  77. #But that's pretty useless. So we can build on the table command to do intervals
  78. CRfr<-CrimeRate$Crime.Rate #input data as a list
  79. breaks<-seq(0, 80, by=10) #identify bins
  80. CRfr.cut<-cut(CRfr, breaks, right=FALSE) #cut dataset into bins, which are defined by breaks
  81. CRfr.freq<-table(CRfr.cut) #create a new list with these bins
  82. View(CRfr.freq)
  83.  
  84. CRfr.freq<-table(CRfr.cut)/length(CRfr.cut) #now create a new list with relative freq
  85. View(CRfr.freq)
  86.  
  87. #Visual descriptions
  88. install.packages("ggplot2")
  89. y
  90. library(ggplot2)
  91.  
  92. #do histogram
  93. ggplot(CrimeRate,aes(x=Crime.Rate))+
  94. geom_histogram()
  95.  
  96. #change binwidth
  97. ggplot(CrimeRate,aes(x=Crime.Rate))+
  98. geom_histogram(binwidth = 10)
  99.  
  100. #Make it start at 0 and make each bin not incl. the right edge
  101. #but be careful, you want it to start at 0 in case but it may not always be
  102. #for instance, if you have negative values, this would be wrong
  103. ggplot(CrimeRate,aes(x=Crime.Rate))+
  104. geom_histogram(boundary = 0, bandwidth = 10, right = FALSE)
  105.  
  106. #add titles and axis labels
  107. ggplot(CrimeRate,aes(x=Crime.Rate))+
  108. geom_histogram(boundary=0, colour = "black", bindwidth = 10, fill = "light blue", right = FALSE, no.rm = FALSE)+
  109. xlab("Crime Rate")+
  110. ylab("Frequency")+
  111. ggtitle("Crime Rate in the US - 2005")
  112.  
  113. #BOXPLOT
  114. ggplot(CrimeRate, aes(x=factor(0), y = Crime.Rate))+
  115. geom_boxplot(colour = "black", fill ="orange", na.rm = FALSE)+
  116. xlab("")+
  117. scale_x_discrete(breaks = NULL)+
  118. ylab("Crime Rate")+
  119. ggtitle("Crime Rate in the US - 2005")
  120.  
  121. boxplot(CrimeRate$Crime.Rate ~ Fruits$Data.Frame)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement