Guest User

Untitled

a guest
Apr 25th, 2018
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.26 KB | None | 0 0
  1. library(readr)
  2. Bike_Data1 <- read.csv("E:/R/Bayarea_bikeshare_data/2017-bikeshare-tripdata.csv", header = T)
  3. Bike_Data2 <- read.csv("E:/R/Bayarea_bikeshare_data/201801_bikeshare_tripdata.csv", header = T)
  4. Bike_Data3 <- read.csv("E:/R/Bayarea_bikeshare_data/201802_bikeshare_tripdata.csv", header = T)
  5. Bike_Data4 <- read.csv("E:/R/Bayarea_bikeshare_data/201803_bikeshare_tripdata.csv", header = T)
  6. Bike_Data.extended <- rbind(Bike_Data1, Bike_Data2 )
  7. Bike_Data.extended2 <- rbind(Bike_Data.extended, Bike_Data3)
  8. Bike_Data.ext <- rbind(Bike_Data.extended2, Bike_Data4)
  9.  
  10. str(Bike_Data.ext)
  11. library(ggplot2)
  12. library(lubridate)
  13. library(plyr)
  14. library(dplyr)
  15. library(forecast)
  16. tripdata <- Bike_Data.ext
  17. head(Bike_Data.ext)
  18.  
  19. tripdata$start_station_name <- as.character(tripdata$start_station_name)
  20. tripdata$end_station_name <- as.character(tripdata$end_station_name)
  21.  
  22.  
  23. tripdata$start_time = as.POSIXct(Bike_Data.ext$start_time, format = "%m/%d/%Y %H:%M");
  24. tripdata$end_time = as.POSIXct(Bike_Data.ext$end_time, format = "%m/%d/%Y %H:%M");
  25. StartDate <- strptime(tripdata$start_time, "%m/%d/%Y %H:%M")
  26. tripdata$start_time <- as.POSIXct(StartDate)
  27. EndDate <- strptime(tripdata$end_time, "%m/%d/%Y %H:%M")
  28. tripdata$end_time <- as.POSIXct(EndDate)
  29.  
  30. #Riders by membership/subscription
  31. bar <- ggplot(tripdata, aes(x = factor(1), fill = factor(user_type))) + geom_bar(width = 1)
  32. pie <- bar + coord_polar(theta = "y") + theme_void() + labs(title = "Riders by Membership") +
  33. theme(plot.title = element_text(hjust=0.5))
  34. pie
  35.  
  36.  
  37. original <- tripdata
  38.  
  39. # Creatinf several useful columns with dplyr (breakdown start/end date by Month, Day, Day of Week for analysis
  40. tripdata <- mutate(tripdata, sdate = date(start_time), smonth = month(start_time,label = TRUE),
  41. sday = day(start_time), swday = wday(start_time,label = TRUE), shr = hour(start_time),
  42. edate = date(start_time), emonth = month(end_time,label = TRUE), eday = day(end_time),
  43. ewday = wday(end_time,label = TRUE), ehr = hour(end_time))
  44.  
  45. # To turn the start_time and end_time into character format to avoid conflicting issues in tallying
  46. tripdata$start_time <- as.character(tripdata$start_time)
  47. tripdata$end.time <- as.character(tripdata$end_time)
  48.  
  49. # Rides in days of the week
  50. week.rider <- ddply(tripdata, .(swday), tally)
  51. cwd$wkday <- ifelse(week.usertype$swday %in% c("Mon","Tues","Wed","Thurs","Fri"), "Weekday","Weekend")
  52. ggplot(week.rider, aes(x = swday, y = n)) + geom_bar(stat='identity', fill = "#2b8cbe") +
  53. labs(title = "Rides over days of week", x = "Days of Week", y = "Count of Rides") +
  54. theme(plot.title = element_text(hjust=0.5))
  55.  
  56. # Rides in days of the week with user type split
  57. week.usertype <- ddply(tripdata, .(swday,user_type), tally)
  58. ggplot(week.usertype, aes(x = swday, y = n, fill = user_type)) + geom_bar(stat='identity') +
  59. labs(title = "Ridership over days of week by Subscriber Type", x = "Days of Week", y = "Count") +
  60. theme(plot.title = element_text(hjust=0.5))
  61.  
  62.  
  63. remove(week.usertype,week.rider)
  64.  
  65. #Hourly Distribution of Rides
  66. hourly.rider <- ddply(tripdata, .(shr,user_type), tally)
  67. ggplot(hourly.rider, aes(x = shr, y = n, fill = user_type)) + geom_bar(stat='identity') +
  68. labs(title = "Ridership over time of day by user Type", x = "Time of day (hr)", y = "Count of Rides") +
  69. theme(plot.title = element_text(hjust=0.5))
  70.  
  71. #Weekly & Hourly Distribution of Rides timing
  72. week.hourly.rider <- ddply(tripdata, .(shr,swday,user_type), tally)
  73. ggplot(week.hourly.rider, aes(x = shr, y = n, fill = user_type)) + facet_grid(. ~ swday) +
  74. geom_bar(stat='identity') + labs(title = "Ridership over Weekday over Time by Subscriber Type",
  75. x = "Time", y = "Count of Rides")
  76.  
  77.  
  78.  
  79. #duration of trips distribution plot
  80. tripdata <- mutate(tripdata, total_min = ((duration_sec)/60), label = TRUE )
  81.  
  82. duration.min <- ddply(tripdata, .(total_min, user_type), tally)
  83. ggplot(duration.min, aes(x = total_min, y = n, fill = factor(user_type))) + geom_bar(stat='identity') +
  84. coord_cartesian(xlim = c(1, 70)) +
  85. labs(title = "Usage of bikes in Minutes", x = "Rides by Duration(Min)", y = "Count of Rides") +
  86. theme(plot.title = element_text(hjust=0.5))
  87.  
  88. # Popular Ride Start Staions
  89. start.station <- ddply(tripdata, .(start_station_name), tally) %>% arrange(desc(n))
  90. head(start.station)
  91.  
  92. #Popular Ride End Stations
  93. end.station <- ddply(tripdata, .(end_station_name), tally) %>% arrange(desc(n))
  94. head(end.station)
  95.  
  96.  
  97.  
  98. #Distribution of trips starting from specific stations
  99. start.station <- ddply(tripdata, .(start_station_name), tally) %>% arrange(desc(n))
  100. head(start.station)
  101. end.station <- ddply(tripdata, .(end_station_name), tally) %>% arrange(desc(n))
  102. head(start.station)
  103. tripdata <- mutate(tripdata, wkend = (swday %in% c("Sat","Sun")))
  104. startdata <- tripdata[tripdata$start_station_name %in% start.station$start_station_name[1:6],] %>%
  105. ddply(.(start_station_name, shr, wkend), tally)
  106. startdata$wkend <- ifelse(startdata$wkend == TRUE, 'Weekend', 'Weekday')
  107. ggplot(startdata, aes(x = shr, y = n, colour = wkend)) + facet_wrap( ~ start_station_name, ncol = 2) +
  108. geom_line(aes(group = wkend)) + geom_point(aes(group = wkend)) +
  109. labs(title = "Distribution of trips starting from each station across time by weekday/weekend",
  110. x = "Time (hr)", y = "Rides Count") + theme(plot.title = element_text(hjust=0.5))
Add Comment
Please, Sign In to add comment