Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.32 KB | None | 0 0
  1. import time
  2. import pandas as pd
  3. import numpy as np
  4.  
  5. CITY_DATA = { 'chicago': 'chicago.csv',
  6.               'new york city': 'new_york_city.csv',
  7.               'washington': 'washington.csv' }
  8.  
  9. def get_filters():
  10.     """
  11.    Asks user to specify a city, month, and day to analyze.
  12.  
  13.    Returns:
  14.        (str) city - name of the city to analyze
  15.        (str) month - name of the month to filter by, or "all" to apply no month filter
  16.        (str) day - name of the day of week to filter by, or "all" to apply no day filter
  17.    """
  18.     print('Hello! Let\'s explore some US bikeshare data!')
  19.  
  20.     # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  21.     city = input("Choose a city between Chicago, New York City, Washington: ").lower()
  22.    
  23.     while city not in ['chicago', 'new york city', 'washington']:
  24.         print('Please enter a valid city')
  25.  
  26.  
  27.     # TO DO: get user input for month (all, january, february, ... , june)
  28.     month = input("Choose a month between January and June. If you would like to look at all months please enter all: ").lower()
  29.                        
  30.     while month not in ['all', 'january', 'february', 'march', 'april', 'may', 'june']:
  31.         print('Please enter a valid month')
  32.                  
  33.  
  34.     # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
  35.     day = input("Choose a day you would like to explore. If you want to look at all days please enter all: ").lower()
  36.                        
  37.     while day not in ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
  38.         print('Please enter a valid day')
  39.  
  40.     print('-'*40)
  41.     return city, month, day
  42.  
  43.  
  44.  
  45. def load_data(city, month, day):
  46.     """
  47.    Loads data for the specified city and filters by month and day if applicable.
  48.  
  49.    Args:
  50.        (str) city - name of the city to analyze
  51.        (str) month - name of the month to filter by, or "all" to apply no month filter
  52.        (str) day - name of the day of week to filter by, or "all" to apply no day filter
  53.    Returns:
  54.        df - Pandas DataFrame containing city data filtered by month and day
  55.    """
  56.     df = pd.read_csv(CITY_DATA[city])
  57.                                    
  58.     df['Start Time'] = pd.to_datetime(df['Start Time'])
  59.     df['month'] = df['Start Time'].dt.month
  60.     df['day_of_week'] = df['Start Time'].dt.weekday_name
  61.     df['hour'] = df['Start Time'].dt.hour
  62.     df['trip'] = df['Start Station'] + ' - ' + df['End Station']
  63.                                    
  64.     if month != 'all':
  65.         months = ['january', 'february', 'march', 'april', 'may', 'june']
  66.         month = months.index(month) + 1
  67.         df = df[df['month'] == month]
  68.              
  69.     if day != 'all':
  70.         df = df[df['day_of_week'] == day.title()]  
  71.  
  72.     print('-'*40)                              
  73.     return df
  74.  
  75.  
  76. def time_stats(df):
  77.     """Displays statistics on the most frequent times of travel."""
  78.  
  79.     print('\nCalculating The Most Frequent Times of Travel...\n')
  80.     start_time = time.time()
  81.  
  82.     # TO DO: display the most common month
  83.     most_common_month = df['month'].mode()[0]
  84.     print('The most common month was: ', most_common_month)
  85.  
  86.     # TO DO: display the most common day of week
  87.     most_common_day_of_week = df['day_of_week'].mode()[0]
  88.     print('The most common day was: ', most_common_day_of_week)
  89.  
  90.     # TO DO: display the most common start hour
  91.     most_common_hour = df['hour'].mode()[0]
  92.     print('The most common hour was: ', most_common_hour)
  93.  
  94.     print("\nThis took %s seconds." % (time.time() - start_time))
  95.     print('-'*40)
  96.     return
  97.  
  98.  
  99. def station_stats(df):
  100.     """Displays statistics on the most popular stations and trip."""
  101.  
  102.     print('\nCalculating The Most Popular Stations and Trip...\n')
  103.     start_time = time.time()
  104.  
  105.     # TO DO: display most commonly used start station
  106.     common_start = df['Start Station'].mode()[0]
  107.     print('The most common starting station was: ', common_start)
  108.  
  109.     # TO DO: display most commonly used end station
  110.     common_end = df['End Station'].mode()[0]
  111.     print('The most common end station was: ', common_end)
  112.  
  113.     # TO DO: display most frequent combination of start station and end station trip
  114.     most_frequent_trip = df['trip'].mode()[0]
  115.     print('The most common combination of start station and end station trip was: ', most_frequent_trip)
  116.  
  117.     print("\nThis took %s seconds." % (time.time() - start_time))
  118.     print('-'*40)
  119.  
  120.  
  121. def trip_duration_stats(df):
  122.     """Displays statistics on the total and average trip duration."""
  123.  
  124.     print('\nCalculating Trip Duration...\n')
  125.     start_time = time.time()
  126.  
  127.     # TO DO: display total travel time
  128.     total_travel_time = df['Trip Duration'].sum()
  129.     print('Total travel time is {}.'.format(total_travel_time))
  130.  
  131.     # TO DO: display mean travel time
  132.     mean_travel_time = df['Trip Duration'].mean()
  133.     print('Mean travel time is {}.'.format(mean_travel_time))
  134.  
  135.     print("\nThis took %s seconds." % (time.time() - start_time))
  136.     print('-'*40)
  137.  
  138.  
  139. def user_stats(df):
  140.     """Displays statistics on bikeshare users."""
  141.  
  142.     print('\nCalculating User Stats...\n')
  143.     start_time = time.time()
  144.  
  145.     # TO DO: Display counts of user types
  146.     user_types = df['User Type'].value_counts()
  147.     print('Counts of user types:', user_types)
  148.  
  149.     # TO DO: Display counts of gender
  150.     num_male = df['Gender'].query("gen == 'Male'").gen.count()
  151.     num_female = df['Gender'].query("gen == 'Female'").gen.count()
  152.     print('The number of male user is {}  and of female user is {}.'.format(num_male, num_female))
  153.  
  154.     # TO DO: Display earliest, most recent, and most common year of birth
  155.     earliest_yob = int(df['Birth Year'].min())
  156.     most_recent_yob = int(df['Birth Year'].max())
  157.     most_common_yob = int(df['Birth Year'].mode())
  158.     print('The earliest year of birth is {}.\n'
  159.       'The most recent year of birth is {}.\n'
  160.       'The most common year of birth is {}.'.format(earliest_yob, most_recent_yob, most_common_yob))
  161.  
  162.     print("\nThis took %s seconds." % (time.time() - start_time))
  163.     print('-'*40)
  164.  
  165.  
  166.  
  167. def main():
  168.     while True:
  169.         city, month, day = get_filters()
  170.         df = load_data(city, month, day)
  171.         time_stats(df)
  172.         station_stats(df)
  173.         trip_duration_stats(df)
  174.         user_stats(df)
  175.  
  176.         first_five_rows = input('Would you like to see 5 rows of data data?\nPlease enter yes or no.').lower()
  177.         if first_five_rows in ('yes', 'y'):
  178.             i = 0
  179.             while True:
  180.                 print(df.iloc[i:i+5])
  181.                 i += 5
  182.                 next_five_rows = input('Would you like to see another 5 rows of data?\nPlease enter yes or no\n').lower()
  183.                 if next_five_rows in ('yes', 'y'):
  184.                     i = 0
  185.                     while True:
  186.                         print(df.iloc[i:i+5])
  187.                         i += 5
  188.                         more_data = input('Would you like to see more data? Please enter yes or no: ').lower()
  189.                         if more_data not in ('yes', 'y'):
  190.                             break
  191.        
  192.         restart = input('\nWould you like to restart? Enter yes or no.\n')
  193.         if restart.lower() != 'yes':
  194.             break
  195.  
  196. if __name__ == "__main__":
  197.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement