Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Online Video Survey Data
- # online video survey 2013
- # http://pewinternet.org/Shared-Content/Data-Sets/2013/July-2013--Online-Video-%28onmibus%29.aspx
- fileUrl2 <- "http://pewinternet.org/~/media/Files/Data%20Sets/2013/Omnibus_July_2013_Video_csv.csv"
- download.file(fileUrl2, destfile = "onlineVideo.csv")
- downloaded2On <- date()
- OV <- read.csv("onlineVideo.csv")
- dim(OV)
- names(OV)
- str(OV)
- length(OV$sample[OV$sample == 8])
- length(OV$act134[OV$act134 == 1])
- head(OV[1:4, 1:4])
- barplot(table(as.factor(weirdos$educ2)))
- barplot(table(as.factor(weirdos$employ)))
- nas <- is.na(OV$sample)
- length(which(nas)) # 0
- length(which(is.na(OV$sex))) # 0
- length(which(is.na(OV$eminuse))) #0
- length(which(is.na(OV$intmob))) #0
- sample1 <- subset(OV, OV$sample == 1) # subset by sample
- dim(sample1)
- act <- subset(OV, OV$eminuse == 1 | OV$intmob == 1) # subset to explore act
- dim(act)
- barplot(table(as.factor(sample1$age)))
- # trimmed age above 97
- trimAgeAct1 <- subset(act, act$age <= 97)
- trimAgeSample1 <- subset(sample1, sample1$age <= 97)
- trimAgeAct1$age[1:5]
- barplot(table(as.factor(trimAgeAct1$age)))
- # distribution of age in sample 1 vs. that of act
- par(mfrow = c(1, 2))
- hist(trimAgeAct1$age, xlab = "eminuse+intmob", col = "blue", main = "")
- hist(trimAgeSample1$age, xlab = "sample = 1", col = "blue", main = "")
- table(trimAgeAct1$sex, trimAgeAct1$age)
- # create char version of sex variable (act)
- trimAgeAct1$sex <- as.character(trimAgeAct1$sex)
- trimAge1Sex <- gsub("2", "female", trimAgeAct1$sex)
- trimAge1Sex <- gsub("1", "male", trimAge1Sex)
- levels(as.factor(trimAge1Sex))
- # split age values into chunks (act)
- library(Hmisc)
- ageRanges<- cut2(trimAgeAct1$age, g=6)
- ageRanges[1:10]
- trimAgeAct1$sex_char <- trimAge1Sex
- trimAgeAct1$ageRanges <- ageRanges
- # make a proportion stacked barplot (act)
- table1 <- table(trimAgeAct1$sex_char, trimAgeAct1$ageRanges)
- proptab <- prop.table(table1, 2)
- proptab
- pdf(file="ageGender.pdf",height=4,width=6)
- par(mfrow = c(1, 1))
- barplot(proptab, col = c("green", "blue"),
- main = "gender/age proportion",
- xlab = "age")
- legend("topright", legend = c("female", "male"), bg = "white", col = c("green", "blue"),
- pch = 15)
- dev.off()
- str(trimAgeAct1)
- # create understandable values in education variable (act)
- eduChar <- as.character(trimAgeAct1$educ2)
- eduChar <- gsub("1", "lessThanHigh", eduChar)
- eduChar <- gsub("2", "incompleteHigh", eduChar)
- eduChar <- gsub("3", "gradHigh", eduChar)
- eduChar <- gsub("4", "collNoDegree", eduChar)
- eduChar <- gsub("5", "2YearsDegree", eduChar)
- eduChar <- gsub("6", "4YearsDegree", eduChar)
- eduChar <- gsub("7", "postGradNoDeg", eduChar)
- eduChar <- gsub("8", "postGradDeg", eduChar)
- eduChar <- gsub("9", "noReply", eduChar)
- eduChar[1:10]
- trimAgeAct1$eduChar <- eduChar
- barplot(table(trimAgeAct1$eduChar))
- par(mfrow = c(1, 2))
- barplot(table(OV$educ2))
- barplot(table(trimAgeAct1$educ2))
- pdf(file="edu.pdf",height=4,width=6)
- par(mfrow = c(1, 1))
- dens1 <- density(OV$educ2)
- dens2 <- density(trimAgeAct1$educ2)
- plot(dens1,lwd=3, col="green")
- lines(dens2, lwd=3, col="red")
- legend("topright", pch = 19,
- col = c("green", "red"), legend = c("All", "INTMOB+EMINUSE"))
- dev.off()
- str(trimAgeAct1)
- # char equivalent of race variable (act)
- raceChar <- as.character(trimAgeAct1$race)
- raceChar <- gsub("1", "White", raceChar)
- raceChar <- gsub("2", "Black", raceChar)
- raceChar <- gsub("3", "Asian", raceChar)
- raceChar <- gsub("4", "Mixed", raceChar)
- raceChar <- gsub("5", "AmerIndian", raceChar)
- raceChar <- gsub("6", "Other", raceChar)
- raceChar <- gsub("9", "noReply", raceChar)
- trimAgeAct1$raceChar <- raceChar
- trimAgeAct1$raceChar[1:5]
- barplot(table(trimAgeAct1$raceChar))
- # race/education prpportion barplot (act)
- table2 <- table(trimAgeAct1$eduChar, trimAgeAct1$raceChar)
- proptab <- prop.table(table2, 2)
- proptab
- par(mfrow = c(1, 1))
- barplot(proptab, col = rainbow(9),
- main = "education/race proportion",
- xlab = "race")
- legend("topright",
- col = rainbow(9),
- legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
- "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
- "postGradNoDeg"), pch = 15)
- # char equivalent of race variable (general)
- raceCharOV <- as.character(OV$race)
- raceCharOV <- gsub("1", "White", raceCharOV)
- raceCharOV <- gsub("2", "Black", raceCharOV)
- raceCharOV <- gsub("3", "Asian", raceCharOV)
- raceCharOV <- gsub("4", "Mixed", raceCharOV)
- raceCharOV <- gsub("5", "AmerIndian", raceCharOV)
- raceCharOV <- gsub("6", "Other", raceCharOV)
- raceCharOV <- gsub("9", "noReply", raceCharOV)
- OV$raceCharOV <- raceCharOV
- # char equivalent of education variable (general)
- eduCharOV <- as.character(OV$educ2)
- eduCharOV <- gsub("1", "lessThanHigh", eduCharOV)
- eduCharOV <- gsub("2", "incompleteHigh", eduCharOV)
- eduCharOV <- gsub("3", "gradHigh", eduCharOV)
- eduCharOV <- gsub("4", "collNoDegree", eduCharOV)
- eduCharOV <- gsub("5", "2YearsDegree", eduCharOV)
- eduCharOV <- gsub("6", "4YearsDegree", eduCharOV)
- eduCharOV <- gsub("7", "postGradNoDeg", eduCharOV)
- eduCharOV <- gsub("8", "postGradDeg", eduCharOV)
- eduCharOV <- gsub("9", "noReply", eduCharOV)
- OV$eduCharOV <- eduCharOV
- # general education/race barplot
- tableOV1 <- table(OV$eduCharOV, OV$raceCharOV)
- barplot(tableOV1, beside = T, col = rainbow(9), main = "race/education overall")
- legend("topleft",
- col = rainbow(9),
- legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
- "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
- "postGradNoDeg"), pch = 15, cex = 0.7)
- # income variable char equivalent (general)
- incCharOV <- as.character(OV$inc)
- incCharOV <- gsub("1", ">$10k", incCharOV)
- incCharOV <- gsub("2", "$10k-$20k", incCharOV)
- incCharOV <- gsub("3", "$20k-$30k", incCharOV)
- incCharOV <- gsub("4", "$30k-$40k", incCharOV)
- incCharOV <- gsub("5", "$40k-$50k", incCharOV)
- incCharOV <- gsub("6", "$50k-$75k", incCharOV)
- incCharOV <- gsub("7", "$75k-$100k", incCharOV)
- incCharOV <- gsub("7", "$100k-$150k", incCharOV)
- incCharOV <- gsub("7", "<$150k", incCharOV)
- OV$incCharOV <- incCharOV
- # income variable char equivalent (act)
- incChar <- as.character(trimAgeAct1$inc)
- incChar <- gsub("1", ">$10k", incChar)
- incChar <- gsub("2", "$10k-$20k", incChar)
- incChar <- gsub("3", "$20k-$30k", incChar)
- incChar <- gsub("4", "$30k-$40k", incChar)
- incChar <- gsub("5", "$40k-$50k", incChar)
- incChar <- gsub("6", "$50k-$75k", incChar)
- incChar <- gsub("7", "$75k-$100k", incChar)
- incChar <- gsub("8", "$100k-$150k", incChar)
- incChar <- gsub("9", "<$150k", incChar)
- trimAgeAct1$incChar <- incChar
- # education/age barplot (act)
- tab4 <- table(trimAgeAct1$eduChar, trimAgeAct1$ageRanges)
- pdf(file="eduAge.pdf",height=4,width=8)
- barplot(tab4, col = rainbow(9), xlab = "age",
- beside = T, main = "age / education")
- par(mar=c(5, 4, 4, 9), xpd = T)
- legend("topright", inset=c(-0.65,0),
- col = rainbow(9),
- legend = c("2YearsDegree", "4YearsDegree", "collNoDegree", "gradHigh",
- "incompleteHigh", "lessThanHigh", "noReply", "postGradDeg",
- "postGradNoDeg"), pch = 15, cex = 0.7)
- dev.off()
- # exploring 18-29 age group (general)
- youngest <- OV$age <= 29
- length(which(youngest))
- hist(OV$age[youngest], col = "blue", xlab = "age",
- main = "Age distribution (18-29 y.o.)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement