Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- setwd("~/Desktop/Statistics")
- library(ggplot2)
- bostondata = read.csv("311_cases_exercise.csv")
- #Most common request
- summary(bostondata$TYPE)
- #Potholes
- potholes = bostondata[which(bostondata$TYPE=="Request for Pothole Repair"),]
- july2017potholes = potholes[which(substr(as.character(potholes$open_dt),6,7)=="07"),]
- length(july2017potholes)
- #% of graffiti requests still open and overdue
- summary(bostondata$TYPE)
- graffiti = bostondata[which(bostondata$TYPE=="Graffiti Removal"),]
- openoverduegraffiti = graffiti[which(graffiti$CASE_STATUS == "Open" & graffiti$OnTime_Status == "OVERDUE"),]
- length(openoverduegraffiti$TYPE)/length(graffiti$TYPE)
- #request most likely to have a submitted photo be in hyde park
- unique(bostondata$TYPE)
- dframe <-data.frame(matrix(0, ncol = length(unique(bostondata$TYPE)), nrow = 4))
- names(dframe) <- unique(bostondata$TYPE)
- hydeanalysis = dframe
- max = 0;
- for(index in 1:length(unique(bostondata$TYPE))) {
- #all in category
- hydeanalysis[1,index] = nrow(bostondata[which(bostondata$TYPE == unique(bostondata$TYPE)[index]),])
- #all that submitted a photo
- hydeanalysis[2,index] = nrow(bostondata[which(bostondata$TYPE == unique(bostondata$TYPE)[index] & (bostondata$SubmittedPhoto != "NULL")),])
- #all that submitted a photo AND is in hyde park
- hydeanalysis[3,index] = nrow(bostondata[which(bostondata$TYPE == unique(bostondata$TYPE)[index] & (bostondata$SubmittedPhoto != "NULL") & bostondata$neighborhood == "Hyde Park"),])
- #ratio of photos in hyde park
- hydeanalysis[4,index] = hydeanalysis[3,index]/hydeanalysis[2,index]
- if(!is.nan(hydeanalysis[4,index])) {
- if(hydeanalysis[4,index] > max) {
- max = hydeanalysis[4,index];
- }
- }
- }
- max
- colnames(hydeanalysis)[which(hydeanalysis[4,] == max)]
- #Open Analysis of Boston data - responsiveness by district
- summary(bostondata$city_council_district)
- modifiedbostondata = bostondata[which((bostondata$city_council_district)%in%c(1:9)),]
- summary(modifiedbostondata$city_council_district)
- dframe <-data.frame(matrix(0, ncol = 10, nrow=9))
- colnames(dframe) <- c("DistrictNumber", "TotalRequests", "CitizensApp%", "WorkerApp%", "Call%", "Other%", "Open%", "Closed%","OntimeClosed%","LateClosed%")
- districtanalysis <- dframe;
- for(index in 1:9) {
- relevantdata = modifiedbostondata[which(modifiedbostondata$city_council_district == index),]
- dframe[index,1] = index;
- dframe[index,2] = nrow(relevantdata)
- dframe[index,3] = nrow(relevantdata[which(relevantdata$Source == "Citizens Connect App"),])/dframe[index,2]
- dframe[index,4] = nrow(relevantdata[which(relevantdata$Source == "City Worker App"),])/dframe[index,2]
- dframe[index,5] = nrow(relevantdata[which(relevantdata$Source == "Constituent Call"),])/dframe[index,2]
- dframe[index,6] = nrow(relevantdata[-which(relevantdata$Source%in%c("Citizens Connect App", "City Worker App","Constituent Call")),])/dframe[index,2]
- dframe[index,7] = nrow(relevantdata[which(relevantdata$CASE_STATUS == "Open"),])/dframe[index,2]
- dframe[index,8] = nrow(relevantdata[which(relevantdata$CASE_STATUS == "Closed"),])/dframe[index,2]
- dframe[index,9] = nrow(relevantdata[which(relevantdata$CASE_STATUS == "Closed" & relevantdata$OnTime_Status == "ONTIME"),])/nrow(relevantdata[which(relevantdata$CASE_STATUS == "Closed"),])
- dframe[index,10] = nrow(relevantdata[which(relevantdata$CASE_STATUS == "Closed" & relevantdata$OnTime_Status == "OVERDUE"),])/nrow(relevantdata[which(relevantdata$CASE_STATUS == "Closed"),])
- }
- print(dframe)
- ggplot(data=modifiedbostondata,aes(x= city_council_district, fill=Source)) + geom_bar(stat="count") + ggtitle("Constituent 311 Requests Analysis") + xlab("City district") + ylab("# Requests")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement