Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import pandas as pd
- import numpy as np
- CITY_DATA = { 'chicago': 'chicago.csv',
- 'new york city': 'new_york_city.csv',
- 'washington': 'washington.csv' }
- cities = ['washington', 'chicago', 'new york city']
- months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
- days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
- def get_filters():
- """
- Asks user to specify a city, month, and day to analyze.
- Returns:
- (str) city - name of the city to analyze
- (str) month - name of the month to filter by, or "all" to apply no month filter
- (str) day - name of the day of week to filter by, or "all" to apply no day filter
- """
- print('Hello! Let\'s explore some US bikeshare data!')
- # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
- while True:
- city = input('Enter the city you wish to analyze:> ').lower()
- if city not in cities:
- print('\n"{}" is not found in our records.'.format(city))
- continue
- else:
- break
- # get user input for month (all, january, february, ... , june)
- while True:
- month = input('What month would you like to filter your data by?:').lower()
- if month not in months:
- print('\n"{}" is not found in our records.'.format(month))
- continue
- else:
- break
- # get user input for day of week (all, monday, tuesday, ... sunday)
- while True:
- day = input('What day would you like to filter your data by?:').lower()
- if day not in days:
- print('\n"{}" is not found in our records.'.format(day))
- continue
- else:
- break
- print('-'*40)
- return city, month, day
- def load_data(city, month, day):
- """
- Loads data for the specified city and filters by month and day if applicable.
- Args:
- (str) city - name of the city to analyze
- (str) month - name of the month to filter by, or "all" to apply no month filter
- (str) day - name of the day of week to filter by, or "all" to apply no day filter
- Returns:
- df - Pandas DataFrame containing city data filtered by month and day
- """
- df = pd.read_csv(CITY_DATA[city])
- df["Start Time"] = pd.to_datetime(df["Start Time"])
- if month != 'all':
- months = ['january', 'february', 'march', 'april', 'may', 'june']
- month = months.index(month) + 1
- # filter to create the new dataframe
- if day != 'all':
- days = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
- day = days.index(day) + 1
- # filter to create the new dataframe
- return df
- def time_stats(df):
- """Displays statistics on the most frequent times of travel."""
- print('\nCalculating The Most Frequent Times of Travel...\n')
- start_time = time.time()
- # convert the Start Time column to datetime
- df['Start Time'] = pd.to_datetime(df['Start Time'])
- # extract month from the Start Time column to create a month column
- df['month'] = df['Start Time'].dt.month
- # find the most popular month
- popular_month = df['month'].mode()[0]
- print('Most Popular Month: {}'.format(popular_month))
- # TO DO: display the most common day of week
- # convert the Start Time column to datetime
- df['Start Time'] = pd.to_datetime(df['Start Time'])
- # create day column
- df['day'] = df['Start Time'].dt.day
- # find most popular day
- popular_day = df['day'].mode()[0]
- print('Most Popular Day: {}'.format(popular_day))
- # TO DO: display the most common start hour
- df['Start Time'] = pd.to_datetime(df['Start Time'])
- #create an hour column
- df['hour'] = df['Start Time'].dt.hour
- #most popular hour
- popular_hour = df['hour'].mode()[0]
- print('Most Popular Hour: {}'.format(popular_hour))
- print("\nThis took %s seconds." % (time.time() - start_time))
- print('-'*40)
- def station_stats(df):
- """Displays statistics on the most popular stations and trip."""
- print('\nCalculating The Most Popular Stations and Trip...\n')
- start_time = time.time()
- # TO DO: display most commonly used start station
- popular_start_station = df['Start Station'].mode()[0]
- print('Most Popular Start Station: {}'.format(popular_start_station))
- # TO DO: display most commonly used end station
- popular_end_station = df['End Station'].mode()[0]
- print('Most Popular End Station: {}'.format(popular_end_station))
- # TO DO: display most frequent combination of start station and end station trip
- most_popular_start_end_station = df[['Start Station', 'End Station']].mode().loc[0]
- print("The most commonly used start station and end station : {}, {}".format(most_popular_start_end_station[0], most_popular_start_end_station[1]))
- print("\nThis took %s seconds." % (time.time() - start_time))
- print('-'*40)
- def trip_duration_stats(df):
- """Displays statistics on the total and average trip duration."""
- print('\nCalculating Trip Duration...\n')
- start_time = time.time()
- # TO DO: display total travel time
- travel_time = df['Trip Duration'].sum()
- print('Total Travel Time: {}'.format(travel_time))
- # TO DO: display mean travel time
- mean_time = df['Trip Duration'].mean()
- print('Mean Travel Time: {}'.format(mean_time))
- print("\nThis took %s seconds." % (time.time() - start_time))
- print('-'*40)
- def user_stats(df):
- """Displays statistics on bikeshare users."""
- print('\nCalculating User Stats...\n')
- start_time = time.time()
- # display counts of user types
- user_types = df['User Type'].value_counts()
- print('User Types: \n{}\n'.format(user_types))
- try:
- # TO DO: Display counts of gender
- gender_types = df['Gender'].value_counts()
- # TO DO: Display earliest, most recent, and most common year of birth
- earliest_birth_year = df['Birth Year'].min()
- most_recent_birth_year = df['Birth Year'].max()
- popular_birth_year = df['Birth Year'].mode()[0]
- except KeyError:
- # print error statement for missing data
- print('Gender data not available. \nCannot display statistics.\n')
- print('Birth Year data not available. \nCannot display statistics.')
- else:
- # display counts of gender
- print(gender_types)
- # display earliest birth year
- print('Earliest Birth Year:', earliest_birth_year.min())
- # display most recent birth
- print('Most Recent Birth Year:', most_recent_birth_year.min())
- # display most popular birth
- print('Most Popular Birth Year:', popular_birth_year)
- print("\nThis took %s seconds." % (time.time() - start_time))
- print('-'*40)
- def main():
- while True:
- city, month, day = get_filters()
- df = load_data(city, month, day)
- time_stats(df)
- station_stats(df)
- trip_duration_stats(df)
- user_stats(df)
- restart = input('\nWould you like to restart? Enter yes or no.\n')
- if restart.lower() != 'yes':
- break
- line_number = 0
- def raw_data(df):
- all_data = input('Would you like to see the raw data? Please enter yes or no.')
- while all_data not in ['yes', 'no']:
- print('Please enter yes or no')
- all_data = input('Would you like to see the raw data? Please enter yes or no.')
- if all_data == 'no':
- return
- elif all_data == 'yes':
- print(df.iloc[line_number])
- keep_going = input('Do you want to see more data? Enter yes or no.').lower()
- if keep_going == 'no':
- return
- elif keep_going == 'yes':
- print(df.iloc[line_number : line_number + 5])
- line_number += 5
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement