Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.17 KB | None | 0 0
  1. import time
  2. import pandas as pd
  3. import numpy as np
  4.  
  5. CITY_DATA = { 'chicago': 'chicago.csv',
  6.               'new york city': 'new_york_city.csv',
  7.               'washington': 'washington.csv' }
  8.  
  9.  
  10. cities = ['washington', 'chicago', 'new york city']
  11. months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
  12. days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
  13.  
  14. def get_filters():
  15.     """
  16.    Asks user to specify a city, month, and day to analyze.
  17.  
  18.    Returns:
  19.        (str) city - name of the city to analyze
  20.        (str) month - name of the month to filter by, or "all" to apply no month filter
  21.        (str) day - name of the day of week to filter by, or "all" to apply no day filter
  22.    """
  23.     print('Hello! Let\'s explore some US bikeshare data!')
  24.     # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  25.     while True:
  26.         city = input('Enter the city you wish to analyze:> ').lower()  
  27.         if city not in cities:
  28.             print('\n"{}" is not found in our records.'.format(city))
  29.             continue
  30.         else:
  31.             break
  32.  
  33.     # get user input for month (all, january, february, ... , june)
  34.     while True:
  35.         month = input('What month would you like to filter your data by?:').lower()
  36.         if month not in months:
  37.             print('\n"{}" is not found in our records.'.format(month))
  38.             continue
  39.         else:
  40.             break
  41.  
  42.     # get user input for day of week (all, monday, tuesday, ... sunday)
  43.     while True:
  44.         day = input('What day would you like to filter your data by?:').lower()
  45.         if day not in days:
  46.             print('\n"{}" is not found in our records.'.format(day))
  47.             continue
  48.         else:
  49.             break
  50.  
  51.  
  52.     print('-'*40)
  53.     return city, month, day
  54.  
  55.  
  56. def load_data(city, month, day):
  57.     """
  58.    Loads data for the specified city and filters by month and day if applicable.
  59.  
  60.    Args:
  61.        (str) city - name of the city to analyze
  62.        (str) month - name of the month to filter by, or "all" to apply no month filter
  63.        (str) day - name of the day of week to filter by, or "all" to apply no day filter
  64.    Returns:
  65.        df - Pandas DataFrame containing city data filtered by month and day
  66.    """
  67.     df = pd.read_csv(CITY_DATA[city])
  68.    
  69.     df["Start Time"] = pd.to_datetime(df["Start Time"])
  70.  
  71.     if month != 'all':
  72.         months = ['january', 'february', 'march', 'april', 'may', 'june']
  73.         month = months.index(month) + 1
  74.         # filter to create the new dataframe        
  75.        
  76.        
  77.        
  78.     if day != 'all':
  79.         days = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
  80.         day = days.index(day) + 1
  81.         # filter to create the new dataframe        
  82.        
  83.                    
  84.     return df
  85.  
  86.  
  87. def time_stats(df):
  88.     """Displays statistics on the most frequent times of travel."""
  89.  
  90.     print('\nCalculating The Most Frequent Times of Travel...\n')
  91.     start_time = time.time()
  92.        
  93.         # convert the Start Time column to datetime
  94.     df['Start Time'] = pd.to_datetime(df['Start Time'])
  95.    
  96.     # extract month from the Start Time column to create a month column
  97.     df['month'] = df['Start Time'].dt.month
  98.    
  99.     # find the most popular month
  100.     popular_month = df['month'].mode()[0]
  101.     print('Most Popular Month: {}'.format(popular_month))
  102.  
  103.     # TO DO: display the most common day of week
  104.     # convert the Start Time column to datetime
  105.     df['Start Time'] = pd.to_datetime(df['Start Time'])
  106.    
  107.     # create day column
  108.     df['day'] = df['Start Time'].dt.day
  109.    
  110.     # find most popular day
  111.     popular_day = df['day'].mode()[0]
  112.     print('Most Popular Day: {}'.format(popular_day))
  113.  
  114.     # TO DO: display the most common start hour
  115.     df['Start Time'] = pd.to_datetime(df['Start Time'])
  116.    
  117.     #create an hour column
  118.     df['hour'] = df['Start Time'].dt.hour
  119.    
  120.     #most popular hour
  121.     popular_hour = df['hour'].mode()[0]
  122.     print('Most Popular Hour: {}'.format(popular_hour))
  123.  
  124.     print("\nThis took %s seconds." % (time.time() - start_time))
  125.     print('-'*40)
  126.  
  127.  
  128. def station_stats(df):
  129.     """Displays statistics on the most popular stations and trip."""
  130.  
  131.     print('\nCalculating The Most Popular Stations and Trip...\n')
  132.     start_time = time.time()
  133.  
  134.     # TO DO: display most commonly used start station
  135.     popular_start_station = df['Start Station'].mode()[0]
  136.     print('Most Popular Start Station: {}'.format(popular_start_station))
  137.    
  138.  
  139.     # TO DO: display most commonly used end station
  140.     popular_end_station = df['End Station'].mode()[0]
  141.     print('Most Popular End Station: {}'.format(popular_end_station))
  142.    
  143.    
  144.     # TO DO: display most frequent combination of start station and end station trip
  145.     most_popular_start_end_station = df[['Start Station', 'End Station']].mode().loc[0]
  146.     print("The most commonly used start station and end station : {}, {}".format(most_popular_start_end_station[0], most_popular_start_end_station[1]))
  147.    
  148.     print("\nThis took %s seconds." % (time.time() - start_time))
  149.     print('-'*40)
  150.  
  151.  
  152. def trip_duration_stats(df):
  153.     """Displays statistics on the total and average trip duration."""
  154.  
  155.     print('\nCalculating Trip Duration...\n')
  156.     start_time = time.time()
  157.  
  158.     # TO DO: display total travel time
  159.     travel_time = df['Trip Duration'].sum()
  160.     print('Total Travel Time: {}'.format(travel_time))
  161.  
  162.     # TO DO: display mean travel time
  163.     mean_time = df['Trip Duration'].mean()
  164.     print('Mean Travel Time: {}'.format(mean_time))
  165.  
  166.     print("\nThis took %s seconds." % (time.time() - start_time))
  167.     print('-'*40)
  168.  
  169.  
  170. def user_stats(df):
  171.     """Displays statistics on bikeshare users."""
  172.  
  173.     print('\nCalculating User Stats...\n')
  174.     start_time = time.time()
  175.    
  176.      # display counts of user types
  177.     user_types = df['User Type'].value_counts()
  178.     print('User Types: \n{}\n'.format(user_types))
  179.    
  180.     try:
  181.          # TO DO: Display counts of gender
  182.          gender_types = df['Gender'].value_counts()
  183.  
  184.          # TO DO: Display earliest, most recent, and most common year of birth
  185.          earliest_birth_year = df['Birth Year'].min()
  186.    
  187.          most_recent_birth_year = df['Birth Year'].max()
  188.    
  189.          popular_birth_year = df['Birth Year'].mode()[0]
  190.    
  191.     except KeyError:
  192.          # print error statement for missing data
  193.          print('Gender data not available. \nCannot display statistics.\n')
  194.    
  195.          print('Birth Year data not available.  \nCannot display statistics.')
  196.    
  197.     else:
  198.          # display counts of gender
  199.          print(gender_types)
  200.          # display earliest birth year
  201.          print('Earliest Birth Year:', earliest_birth_year.min())
  202.          # display most recent birth
  203.          print('Most Recent Birth Year:', most_recent_birth_year.min())
  204.          # display most popular birth
  205.          print('Most Popular Birth Year:', popular_birth_year)
  206.    
  207.  
  208.     print("\nThis took %s seconds." % (time.time() - start_time))
  209.     print('-'*40)
  210.  
  211.  
  212.  
  213.    
  214.    
  215. def main():
  216.     while True:
  217.         city, month, day = get_filters()
  218.         df = load_data(city, month, day)
  219.  
  220.         time_stats(df)
  221.         station_stats(df)
  222.         trip_duration_stats(df)
  223.         user_stats(df)
  224.  
  225.         restart = input('\nWould you like to restart? Enter yes or no.\n')
  226.         if restart.lower() != 'yes':
  227.             break
  228.         line_number = 0
  229. def raw_data(df):
  230.    
  231.     all_data = input('Would you like to see the raw data? Please enter yes or no.')
  232.  
  233.     while all_data not in ['yes', 'no']:
  234.          print('Please enter yes or no')
  235.     all_data = input('Would you like to see the raw data? Please enter yes or no.')
  236.  
  237.     if all_data == 'no':
  238.         return
  239.     elif all_data == 'yes':
  240.         print(df.iloc[line_number])
  241.  
  242.     keep_going = input('Do you want to see more data? Enter yes or no.').lower()
  243.  
  244.     if keep_going == 'no':
  245.         return
  246.     elif keep_going == 'yes':
  247.         print(df.iloc[line_number : line_number + 5])
  248.         line_number += 5    
  249.  
  250.  
  251. if __name__ == "__main__":
  252.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement