document.write('
Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. #Step 0:  load/prepare data
  2.  
  3. #Read in data
  4. sample_data <- read.csv("~/Desktop/test_data.csv")
  5.  
  6. # "Explode" counts by age back to unsummarized "raw" data
  7. age.exploded <- rep.int(sample_data$age, sample_data$count)
  8.  
  9.  
  10. #1. Histogram with normal distributon overlaid or density curve
  11.  
  12.  
  13. #1A.  Create histogram
  14. hist(age.exploded, xlim= c(0,20), ylim= c(0,.2), breaks=seq(min(age.exploded),
  15.   max(age.exploded), length=22), xlab = "Age", ylab= "Percentage of Accounts",
  16.   main = "Age Distribution of Accounts\\n (where 0 <= age <= 20)",
  17.   prob= TRUE, col= "lightgray")
  18.  
  19. #1B.  Do one of the following, either put the normal distribution on the histogram
  20. #     or put the smoothed density function
  21.  
  22. #Calculate normal distribution having mean/sd equal to data plotted in the
  23. #histogram above
  24. points(seq(min(age.exploded), max(age.exploded), length.out=500),
  25.        dnorm(seq(min(age.exploded), max(age.exploded), length.out=500),
  26.              mean(age.exploded), sd(age.exploded)), type="l", col="red")
  27.  
  28. #Add smoothed density function to histogram, smoothness toggled using
  29. #"adjust" parameter
  30. lines(density(age.exploded, adjust = 2), col = "blue")
  31.  
  32. #2 Histogram with line plot overlaid
  33.  
  34. #2A.  Create histogram with extra border space on right-hand side
  35.  
  36. #Extra border space "2" on right  (bottom, left, top, right)
  37. par(oma=c(0,0,0,2))
  38.  
  39. hist(age.exploded, xlim= c(0,20), ylim= c(0,.2),
  40.      breaks=seq(min(age.exploded), max(age.exploded), length=22), xlab = "Age",
  41.      ylab= "Percentage of Accounts", main = "Age Distribution of Accounts vs. Subscription Rate \\n (where reported age <= 20)",
  42.      prob= TRUE, col= "lightgray")
  43.  
  44. #2B.  Add overlaid line plot, create a right-side numeric axis
  45. par(new=T)
  46. plot(sample_data$subscribe_pct, xlab= "", ylab="", type = "b", col = "red", axes=FALSE)  
  47. axis(4)
  48.  
  49. #2C.  Add right-side axis label
  50.  
  51. mtext(text="Subscription Rate",side=4, outer=TRUE, padj=1)
');