Advertisement
Guest User

Untitled

a guest
May 26th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.70 KB | None | 0 0
  1. import time as time
  2. import pandas as pd
  3. import numpy as np
  4.  
  5. CITY_DATA = { 'chicago': 'chicago.csv',
  6. 'new york city': 'new_york_city.csv',
  7. 'washington': 'washington.csv' }
  8.  
  9.  
  10. def get_filters():
  11. """
  12. Asks user to specify a city, month, and day to analyze.
  13.  
  14. Returns:
  15. (str) city - name of the city to analyze
  16. (str) month - name of the month to filter by, or "all" to apply no month filter
  17. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  18. """
  19. print('Hello! Let\'s explore some US bikeshare data!')
  20. # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  21. while True:
  22. city = input('Please choose a city (Chicago, New York City, Washington):\n').lower()
  23. if city not in ('chicago', 'new york city', 'washington'):
  24. print('Wrong input! Please choose a city (Chicago, New York City, Washington):\n')
  25. continue
  26. else:
  27. break
  28.  
  29. # get user input for month (all, january, february, ... , june)
  30. while True:
  31. month = input('Please choose a month (January, February, March, April, May, June or all:\n').lower()
  32. if month not in ('january', 'february', 'march', 'april', 'may', 'june', 'all'):
  33. print('Wrong input! Please choose a month (January, February, March, April, May, June or all:\n')
  34. continue
  35. else:
  36. break
  37.  
  38. # get user input for day of week (all, monday, tuesday, ... sunday)
  39. while True:
  40. day = input('Please choose a day (Monday, Tuesday, ... Sunday or all:\n').lower()
  41. if day not in ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all'):
  42. print('Wrong input! Please choose a day (Monday, Tuesday, ... Sunday or all:\n')
  43. continue
  44. else:
  45. break
  46.  
  47. print('-'*40)
  48. return city, month, day
  49.  
  50.  
  51. def load_data(city, month, day):
  52. """
  53. Loads data for the specified city and filters by month and day if applicable.
  54.  
  55. Args:
  56. (str) city - name of the city to analyze
  57. (str) month - name of the month to filter by, or "all" to apply no month filter
  58. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  59. Returns:
  60. df - Pandas DataFrame containing city data filtered by month and day
  61. """
  62. df = pd.read_csv(CITY_DATA[city])
  63.  
  64. df['Start Time'] = pd.to_datetime(df['Start Time'])
  65. df['month'] = df['Start Time'].dt.month
  66. df['day_of_week'] = df['Start Time'].dt.weekday_name
  67. df['hour'] = df['Start time'].dt.hour
  68.  
  69. if months != 'all':
  70. months = ['january', 'february', 'march', 'april', 'may', 'june']
  71. month = months.index(month) + 1
  72. df = df[df['month'] == month]
  73.  
  74. if day != 'all':
  75. df = df[df['day_of_week'] == day.title()]
  76.  
  77. return df
  78.  
  79.  
  80. def time_stats(df):
  81. """Displays statistics on the most frequent times of travel."""
  82.  
  83. print('\nCalculating The Most Frequent Times of Travel...\n')
  84. start_time = time.time()
  85.  
  86. # display the most common month
  87. popular_month = df['month'].mode()[0] - 1
  88. print('The most popular month is {}.'.format(months[popular_month].title()))
  89.  
  90. # display the most common day of week
  91. popular_day = df['day_of_week'].mode()[0]
  92. print('The most popular day of week is {}.'.format(popular_day))
  93.  
  94. # display the most common start hour
  95. popular_hour = df['hour'].mode()[0]
  96. print('The most popular hour is {}.'.format(popular_hour))
  97.  
  98. print("\nThis took %s seconds." % (time.time() - start_time))
  99. print('-'*40)
  100.  
  101.  
  102. def station_stats(df):
  103. """Displays statistics on the most popular stations and trip."""
  104.  
  105. print('\nCalculating The Most Popular Stations and Trip...\n')
  106. start_time = time.time()
  107.  
  108. # display most commonly used start station
  109. popular_start_station = df['Start Station'].mode()[0]
  110. print('{} is the most popular start station.'.format(popular_start_station))
  111.  
  112. # display most commonly used end station
  113. popular_end_station = df['End Station'].mode()[0]
  114. print('{} is the most popular end station.'.format(popular_end_station))
  115.  
  116. # display most frequent combination of start station and end station trip
  117. station_pair = df.groupby(['Start Station', 'End Station']).size().nlargest(1).reset_index(name='count')
  118. print('The most frequent combination of stations is {} (start) and {} (end).\nThis route was used {} time(s).'.format(station_pair['Start Station'][0], station_pair['End Station'][0], station_pair['count'][0]))
  119.  
  120. print("\nThis took %s seconds." % (time.time() - start_time))
  121. print('-'*40)
  122.  
  123.  
  124. def trip_duration_stats(df):
  125. """Displays statistics on the total and average trip duration."""
  126.  
  127. print('\nCalculating Trip Duration...\n')
  128. start_time = time.time()
  129.  
  130. df['End Time'] = pd.to_datetime(df['End Time'])
  131. df['Trip Duration'] = df['End Time'] - df['Start Time']
  132. # display total travel time
  133. total_travel_time = df['Trip Duration'].sum()
  134. print('Total travel time of all bike rides is {}.'.format(total_travel_time))
  135. # display mean travel time
  136. mean_travel_time = df['Trip Duration'].mean()
  137. print('Mean travel time is {}.'.format(mean_travel_time))
  138.  
  139. print("\nThis took %s seconds." % (time.time() - start_time))
  140. print('-'*40)
  141.  
  142.  
  143. def user_stats(df):
  144. """Displays statistics on bikeshare users."""
  145.  
  146. print('\nCalculating User Stats...\n')
  147. start_time = time.time()
  148.  
  149. # Display counts of user types
  150. user_type_count = df['User Type'].value_counts()
  151. print('User counts:\n', user_type_count)
  152.  
  153. # Display counts of gender
  154. gender_count = df['Gender'].value_counts()
  155. print('Gender counts:\n', gender_count)
  156.  
  157. # Display earliest, most recent, and most common year of birth
  158. min_year = df['Birth Year'].min()
  159. print('The earliest year of birth is {}.'.format(min_year))
  160.  
  161. max_year = df['Birth Year'].max()
  162. print('The most recent year of birth is {}.'.format(max_year))
  163.  
  164. common_year = df['Birth Year'].mode()[0]
  165. print('The most common year of birth is {}.'.format(common_year))
  166.  
  167. print("\nThis took %s seconds." % (time.time() - start_time))
  168. print('-'*40)
  169.  
  170.  
  171. def main():
  172. while True:
  173. city, month, day = get_filters()
  174. df = load_data(city, month, day)
  175.  
  176. time_stats(df)
  177. station_stats(df)
  178. trip_duration_stats(df)
  179. user_stats(df)
  180.  
  181. restart = input('\nWould you like to restart? Enter yes or no.\n')
  182. if restart.lower() != 'yes':
  183. break
  184.  
  185.  
  186. if __name__ == "__main__":
  187. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement