Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.81 KB | None | 0 0
  1. import time
  2. import pandas as pd
  3. import numpy as np
  4.  
  5. CITY_DATA = { 'chicago': 'chicago.csv',
  6. 'new york city': 'new_york_city.csv',
  7. 'washington': 'washington.csv' }
  8.  
  9. def get_filters():
  10. """
  11. Asks user to specify a city, month, and day to analyze.
  12.  
  13. Returns:
  14. (str) city - name of the city to analyze
  15. (str) month - name of the month to filter by, or "all" to apply no month filter
  16. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  17. """
  18. print('Hello! Let\'s explore some US bikeshare data!')
  19. # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  20. while True:
  21. city = input('What city would you like to explore chicago, new york city, or washington? ').lower()
  22. if (city in['chicago', 'new york city', 'washington']):
  23. break
  24. else:
  25. print('That is not a valid city, please enter one of the three cities.')
  26.  
  27. # TO DO: get user input for month (all, january, february, ... , june)
  28. while True:
  29. month = input('Enter the month you would like to explore or all: ').lower()
  30. if (month in ['january', 'february', 'march', 'april', 'may', 'june', 'all']):
  31. break
  32. else: print('Sorry, that is not a valid entry.')
  33.  
  34. # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
  35. while True:
  36. day = input('Enter the day of the week you would like to explore: ').lower()
  37. if (day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']):
  38. break
  39. else: print('Sorry, that is not a valid entry.')
  40.  
  41.  
  42. print('-'*40)
  43. return city, month, day
  44.  
  45.  
  46. def load_data(city, month, day):
  47. """
  48. Loads data for the specified city and filters by month and day if applicable.
  49.  
  50. Args:
  51. (str) city - name of the city to analyze
  52. (str) month - name of the month to filter by, or "all" to apply no month filter
  53. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  54. Returns:
  55. df - Pandas DataFrame containing city data filtered by month and day
  56. """
  57. df = pd.read_csv(CITY_DATA[city])
  58.  
  59. df['Start Time'] = pd.to_datetime(df['Start Time'])
  60.  
  61. df['month'] = df['Start Time'].dt.month
  62. df['day'] = df['Start Time'].dt.weekday_name
  63. df['hour'] = df['Start Time'].dt.hour
  64.  
  65. if month != 'all':
  66. months = ['january', 'february', 'march', 'april', 'may', 'june']
  67. month = months.index(month) + 1
  68.  
  69. df = df[df['month'] == month]
  70.  
  71. if day != 'all':
  72. df = df[df['day'] == day.title()]
  73.  
  74.  
  75. return df
  76.  
  77.  
  78. def time_stats(df):
  79. """Displays statistics on the most frequent times of travel."""
  80.  
  81. print('\nCalculating The Most Frequent Times of Travel...\n')
  82. start_time = time.time()
  83.  
  84. # TO DO: display the most common month
  85. most_common_month = df['month'].value_counts()
  86. print('The most popular month for bike travel is: {}'.format(most_common_month))
  87.  
  88. # TO DO: display the most common day of week
  89. most_common_day = df['day'].value_counts()
  90. print('The most popular day for bike travel is: {}'.format(most_common_day))
  91.  
  92. # TO DO: display the most common start hour
  93. most_common_hour = df['hour'].value_counts()
  94. print('The most popular hour for bike travel is: {}'.format(most_common_hour))
  95.  
  96. print("\nThis took %s seconds." % (time.time() - start_time))
  97. print('-'*40)
  98.  
  99.  
  100. def station_stats(df):
  101. """Displays statistics on the most popular stations and trip."""
  102.  
  103. print('\nCalculating The Most Popular Stations and Trip...\n')
  104. start_time = time.time()
  105.  
  106. # TO DO: display most commonly used start station
  107. most_common_start = df['Start Station'].mode()[0]
  108. print('The most popular start station is: {}'.format(most_common_start))
  109.  
  110. # TO DO: display most commonly used end station
  111. most_common_end = df['End Station'].mode()[0]
  112. print('The most popular end station is: {}'.format(most_common_end))
  113.  
  114. # TO DO: display most frequent combination of start station and end station trip
  115. df['frequent_comb'] = df['Start Station'] + ' to ' + df['End Station']
  116. most_frequent_comb = df['frequent_comb'].mode().loc[0]
  117. print('The most frequent trip taken is from: {}'.format(most_frequent_comb))
  118.  
  119. print("\nThis took %s seconds." % (time.time() - start_time))
  120. print('-'*40)
  121.  
  122.  
  123. def trip_duration_stats(df):
  124. """Displays statistics on the total and average trip duration."""
  125.  
  126. print('\nCalculating Trip Duration...\n')
  127. start_time = time.time()
  128.  
  129. # TO DO: display total travel time
  130. total_travel_time = df['Trip Duration'].sum()
  131. print('The total travel time is: {} seconds.'. format(total_travel_time))
  132. # TO DO: display mean travel time
  133. mean_travel_time = df['Trip Duration'].mean()
  134. print('The average travel time is: {} seconds.'.format(mean_travel_time))
  135.  
  136. print("\nThis took %s seconds." % (time.time() - start_time))
  137. print('-'*40)
  138.  
  139.  
  140. def user_stats(df):
  141. """Displays statistics on bikeshare users."""
  142.  
  143. print('\nCalculating User Stats...\n')
  144. start_time = time.time()
  145.  
  146. # TO DO: Display counts of user types
  147. user_types = df['User Type'].value_counts()
  148. print('Here are the user types: {}'.format(user_types))
  149.  
  150. # TO DO: Display counts of gender
  151. while True:
  152. if ('Gender' not in df):
  153. break
  154. else:
  155. gender_counts = df['Gender'].value_counts()
  156. print('Here are the user counts by gender: {}'.format(gender_counts))
  157. break
  158.  
  159. # TO DO: Display earliest, most recent, and most common year of birth
  160. while True:
  161. if ('Birth Year' not in df):
  162. break
  163. else:
  164. earliest_birth_year = df['Birth Year'].min()
  165. most_recent_birth_year = df['Birth Year'].max()
  166. most_common_birth_year = df['Birth Year'].mode()[0]
  167. print('The earliest birht year is {}, the most recent birth year is {}, and the most common birth year is {}.'.format(earliest_birth_year, most_recent_birth_year, most_common_birth_year))
  168. break
  169.  
  170. print("\nThis took %s seconds." % (time.time() - start_time))
  171. print('-'*40)
  172.  
  173.  
  174. def main():
  175. while True:
  176. city, month, day = get_filters()
  177. df = load_data(city, month, day)
  178.  
  179. time_stats(df)
  180. station_stats(df)
  181. trip_duration_stats(df)
  182. user_stats(df)
  183.  
  184. restart = input('\nWould you like to restart? Enter yes or no.\n')
  185. if restart.lower() != 'yes':
  186. break
  187.  
  188.  
  189. if __name__ == "__main__":
  190. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement