Advertisement
ansakoy

exploratoryPlotsDE2.R

Dec 22nd, 2013
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 7.39 KB | None | 0 0
  1. # Online Video Survey Data
  2.  
  3. # online video survey 2013
  4. # http://pewinternet.org/Shared-Content/Data-Sets/2013/July-2013--Online-Video-%28onmibus%29.aspx
  5. fileUrl2 <- "http://pewinternet.org/~/media/Files/Data%20Sets/2013/Omnibus_July_2013_Video_csv.csv"
  6. download.file(fileUrl2, destfile = "onlineVideo.csv")
  7. downloaded2On <- date()
  8. OV <- read.csv("onlineVideo.csv")
  9. dim(OV)
  10. names(OV)
  11. str(OV)
  12.  
  13.  
  14. length(OV$sample[OV$sample == 8])
  15. length(OV$act134[OV$act134 == 1])
  16.  
  17. head(OV[1:4, 1:4])
  18.  
  19. barplot(table(as.factor(weirdos$educ2)))
  20. barplot(table(as.factor(weirdos$employ)))
  21.  
  22. nas <- is.na(OV$sample)
  23. length(which(nas)) # 0
  24.  
  25. length(which(is.na(OV$sex))) # 0
  26. length(which(is.na(OV$eminuse))) #0
  27. length(which(is.na(OV$intmob))) #0
  28.  
  29. sample1 <- subset(OV, OV$sample == 1) # subset by sample
  30. dim(sample1)
  31. act <- subset(OV, OV$eminuse == 1 | OV$intmob == 1) # subset to explore act
  32. dim(act)
  33.  
  34. barplot(table(as.factor(sample1$age)))
  35.  
  36. # trimmed age above 97
  37. trimAgeAct1 <- subset(act, act$age <= 97)
  38. trimAgeSample1 <- subset(sample1, sample1$age <= 97)
  39.  
  40. trimAgeAct1$age[1:5]
  41.  
  42. barplot(table(as.factor(trimAgeAct1$age)))
  43.  
  44. # distribution of age in sample 1 vs. that of act
  45. par(mfrow = c(1, 2))
  46. hist(trimAgeAct1$age, xlab = "eminuse+intmob", col = "blue", main = "")
  47. hist(trimAgeSample1$age, xlab = "sample = 1", col = "blue", main = "")
  48.  
  49.  
  50. table(trimAgeAct1$sex, trimAgeAct1$age)
  51.  
  52. # create char version of sex variable (act)
  53. trimAgeAct1$sex <- as.character(trimAgeAct1$sex)
  54. trimAge1Sex <- gsub("2", "female", trimAgeAct1$sex)
  55. trimAge1Sex <- gsub("1", "male", trimAge1Sex)
  56. levels(as.factor(trimAge1Sex))
  57.  
  58. # split age values into chunks (act)
  59. library(Hmisc)
  60. ageRanges<- cut2(trimAgeAct1$age, g=6)
  61. ageRanges[1:10]
  62. trimAgeAct1$sex_char <- trimAge1Sex
  63. trimAgeAct1$ageRanges <- ageRanges
  64.  
  65. # make a proportion stacked barplot (act)
  66. table1 <- table(trimAgeAct1$sex_char, trimAgeAct1$ageRanges)
  67. proptab <- prop.table(table1, 2)
  68. proptab
  69.  
  70. pdf(file="ageGender.pdf",height=4,width=6)
  71. par(mfrow = c(1, 1))
  72. barplot(proptab, col = c("green", "blue"),
  73.         main = "gender/age proportion",
  74.         xlab = "age")
  75. legend("topright", legend = c("female", "male"), bg = "white", col = c("green", "blue"),
  76.        pch = 15)
  77. dev.off()
  78.  
  79. str(trimAgeAct1)
  80.  
  81. # create understandable values in education variable (act)
  82. eduChar <- as.character(trimAgeAct1$educ2)
  83. eduChar <- gsub("1", "lessThanHigh", eduChar)
  84. eduChar <- gsub("2", "incompleteHigh", eduChar)
  85. eduChar <- gsub("3", "gradHigh", eduChar)
  86. eduChar <- gsub("4", "collNoDegree", eduChar)
  87. eduChar <- gsub("5", "2YearsDegree", eduChar)
  88. eduChar <- gsub("6", "4YearsDegree", eduChar)
  89. eduChar <- gsub("7", "postGradNoDeg", eduChar)
  90. eduChar <- gsub("8", "postGradDeg", eduChar)
  91. eduChar <- gsub("9", "noReply", eduChar)
  92.  
  93. eduChar[1:10]
  94. trimAgeAct1$eduChar <- eduChar
  95. barplot(table(trimAgeAct1$eduChar))
  96.  
  97. par(mfrow = c(1, 2))
  98. barplot(table(OV$educ2))
  99. barplot(table(trimAgeAct1$educ2))
  100.  
  101. pdf(file="edu.pdf",height=4,width=6)
  102. par(mfrow = c(1, 1))
  103. dens1 <- density(OV$educ2)
  104. dens2 <- density(trimAgeAct1$educ2)
  105. plot(dens1,lwd=3, col="green")
  106. lines(dens2, lwd=3, col="red")
  107. legend("topright", pch = 19,
  108.        col = c("green", "red"), legend = c("All", "INTMOB+EMINUSE"))
  109. dev.off()
  110.  
  111. str(trimAgeAct1)
  112.  
  113. # char equivalent of race variable (act)
  114. raceChar <- as.character(trimAgeAct1$race)
  115. raceChar <- gsub("1", "White", raceChar)
  116. raceChar <- gsub("2", "Black", raceChar)
  117. raceChar <- gsub("3", "Asian", raceChar)
  118. raceChar <- gsub("4", "Mixed", raceChar)
  119. raceChar <- gsub("5", "AmerIndian", raceChar)
  120. raceChar <- gsub("6", "Other", raceChar)
  121. raceChar <- gsub("9", "noReply", raceChar)
  122.  
  123.  
  124. trimAgeAct1$raceChar <- raceChar
  125. trimAgeAct1$raceChar[1:5]
  126.  
  127. barplot(table(trimAgeAct1$raceChar))
  128.  
  129. # race/education prpportion barplot (act)
  130. table2 <- table(trimAgeAct1$eduChar, trimAgeAct1$raceChar)
  131. proptab <- prop.table(table2, 2)
  132. proptab
  133.  
  134.  
  135. par(mfrow = c(1, 1))
  136. barplot(proptab, col = rainbow(9),
  137.         main = "education/race proportion",
  138.         xlab = "race")
  139. legend("topright",
  140.        col = rainbow(9),
  141.        legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
  142.                   "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
  143.                   "postGradNoDeg"), pch = 15)
  144.  
  145. # char equivalent of race variable (general)
  146. raceCharOV <- as.character(OV$race)
  147. raceCharOV <- gsub("1", "White", raceCharOV)
  148. raceCharOV <- gsub("2", "Black", raceCharOV)
  149. raceCharOV <- gsub("3", "Asian", raceCharOV)
  150. raceCharOV <- gsub("4", "Mixed", raceCharOV)
  151. raceCharOV <- gsub("5", "AmerIndian", raceCharOV)
  152. raceCharOV <- gsub("6", "Other", raceCharOV)
  153. raceCharOV <- gsub("9", "noReply", raceCharOV)
  154.  
  155. OV$raceCharOV <- raceCharOV
  156.  
  157. # char equivalent of education variable (general)
  158. eduCharOV <- as.character(OV$educ2)
  159. eduCharOV <- gsub("1", "lessThanHigh", eduCharOV)
  160. eduCharOV <- gsub("2", "incompleteHigh", eduCharOV)
  161. eduCharOV <- gsub("3", "gradHigh", eduCharOV)
  162. eduCharOV <- gsub("4", "collNoDegree", eduCharOV)
  163. eduCharOV <- gsub("5", "2YearsDegree", eduCharOV)
  164. eduCharOV <- gsub("6", "4YearsDegree", eduCharOV)
  165. eduCharOV <- gsub("7", "postGradNoDeg", eduCharOV)
  166. eduCharOV <- gsub("8", "postGradDeg", eduCharOV)
  167. eduCharOV <- gsub("9", "noReply", eduCharOV)
  168.  
  169. OV$eduCharOV <- eduCharOV
  170.  
  171. # general education/race barplot
  172. tableOV1 <- table(OV$eduCharOV, OV$raceCharOV)
  173.  
  174. barplot(tableOV1, beside = T, col = rainbow(9), main = "race/education overall")
  175. legend("topleft",
  176.        col = rainbow(9),
  177.        legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
  178.                   "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
  179.                   "postGradNoDeg"), pch = 15, cex = 0.7)
  180. # income variable char equivalent (general)
  181. incCharOV <- as.character(OV$inc)
  182. incCharOV <- gsub("1", ">$10k", incCharOV)
  183. incCharOV <- gsub("2", "$10k-$20k", incCharOV)
  184. incCharOV <- gsub("3", "$20k-$30k", incCharOV)
  185. incCharOV <- gsub("4", "$30k-$40k", incCharOV)
  186. incCharOV <- gsub("5", "$40k-$50k", incCharOV)
  187. incCharOV <- gsub("6", "$50k-$75k", incCharOV)
  188. incCharOV <- gsub("7", "$75k-$100k", incCharOV)
  189. incCharOV <- gsub("7", "$100k-$150k", incCharOV)
  190. incCharOV <- gsub("7", "<$150k", incCharOV)
  191. OV$incCharOV <- incCharOV
  192.  
  193. # income variable char equivalent (act)
  194. incChar <- as.character(trimAgeAct1$inc)
  195. incChar <- gsub("1", ">$10k", incChar)
  196. incChar <- gsub("2", "$10k-$20k", incChar)
  197. incChar <- gsub("3", "$20k-$30k", incChar)
  198. incChar <- gsub("4", "$30k-$40k", incChar)
  199. incChar <- gsub("5", "$40k-$50k", incChar)
  200. incChar <- gsub("6", "$50k-$75k", incChar)
  201. incChar <- gsub("7", "$75k-$100k", incChar)
  202. incChar <- gsub("8", "$100k-$150k", incChar)
  203. incChar <- gsub("9", "<$150k", incChar)
  204. trimAgeAct1$incChar <- incChar
  205.  
  206. # education/age barplot (act)
  207. tab4 <- table(trimAgeAct1$eduChar, trimAgeAct1$ageRanges)
  208. pdf(file="eduAge.pdf",height=4,width=8)
  209. barplot(tab4, col = rainbow(9), xlab = "age",
  210.         beside = T, main = "age / education")
  211. par(mar=c(5, 4, 4, 9), xpd = T)
  212. legend("topright", inset=c(-0.65,0),
  213.        col = rainbow(9),
  214.        legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
  215.                   "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
  216.                   "postGradNoDeg"), pch = 15, cex = 0.7)
  217.  
  218. dev.off()
  219.  
  220. # exploring 18-29 age group (general)
  221. youngest <- OV$age <= 29
  222. length(which(youngest))
  223. hist(OV$age[youngest], col = "blue", xlab = "age",
  224.      main = "Age distribution (18-29 y.o.)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement