Advertisement
PythonPasteNow

Pasted Python Code

Dec 10th, 2019
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.91 KB | None | 0 0
  1. import time
  2. import pandas as pd
  3. import numpy as np
  4.  
  5. CITY_DATA = { 'chicago': 'chicago.csv',
  6.               'new york city': 'new_york_city.csv',
  7.               'washington': 'washington.csv' }
  8.  
  9. def get_filters():
  10.     """
  11.    Asks user to specify a city to analyze.
  12.  
  13.    Returns:
  14.        (str) city 'Birth Year' in df1 - name of the city to analyze
  15.        (str) month - name of the month to filter by, or "all" to apply no month filter
  16.        (str) day - name of the day of week to filter by, or "all" to apply no day filter
  17.    """
  18.     print('\nHello! Let\'s explore some US bikeshare data!')
  19.     # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  20.        
  21.     city = input("To view BikeShare information please choose one of the following cities: Chicago, New York City, or Washington  ").lower()
  22.     while city not in ('chicago', 'new york city', 'washington'):
  23.         print("\nPlease choose Chicago, New York City, or Washington")
  24.         city = input("To view BikeShare information please choose one of the following cities: Chicago, New York City, or Washington  ").lower()
  25.     print("\nThank you for choosing {}.".format(city).title())
  26.     #return city
  27.  
  28.     #TO DO: get user input for month (all, jan, feb, mar, apr, may, jun)
  29.     monthchoice = input("\nChoose the month you would like to see from one of the following: All, January, February, March, April, May, or June: ").lower()
  30.     while monthchoice not in ('all', 'january', 'february', 'march', 'april', 'may', 'june'):
  31.         print("\nPlease choose All, January, February, March, April, May, or June")
  32.         monthchoice = input("\nChoose the month you would like to see from one of the following: All, January, February, March, April, May, or June: ").lower()
  33.     print("\nThank you for choosing {}.".format(monthchoice).title())
  34.    
  35.     #TO DO: get user input for day of week (all, mon, tue, wed, thr, fri, sat, sun)
  36.     daychoice = input("\nChoose the day you would like to see from one of the following: All, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday: ").lower()
  37.     while daychoice not in ('all', 'monday', 'tuesday', 'wednesday', 'thrusday', 'friday', 'saturday', 'sunday'):
  38.         print("\nPlease choose All, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday")
  39.         daychoice = input("\nChoose the day you would like to see from one of the following: All, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday: ").lower()
  40.     print("\nThank you for choosing {}.".format(daychoice).title())
  41.    
  42.    
  43.     print('-'*40)
  44.     return city, monthchoice, daychoice
  45.  
  46.  
  47. def load_data(city, monthchoice, daychoice):
  48.     """
  49.    Loads data for the specified city.
  50.  
  51.    Args:
  52.        (str) city - name of the city to analyze
  53.        (str) monthchoice - name of month to filter by
  54.        (str) daychoice - name of day to filter by
  55.    Returns:
  56.        df1 - Pandas DataFrame containing city data filtered by month and day
  57.    """
  58.     #load data from choosen city
  59.     df1 = pd.read_csv(CITY_DATA[city])
  60.  
  61.     #convert Start Time to dateime
  62.     df1['Start Time'] = pd.to_datetime(df1['Start Time'])
  63.    
  64.     #create monthchoice and daychoice into columns
  65.     df1['monthchoice'] =df1['Start Time'].dt.month
  66.     df1['daychoice'] =df1['Start Time'].dt.weekday_name
  67.    
  68.     #use monthchoice to filter by month, if all don't filter by monthchoice, then create dataframe
  69.     if monthchoice != 'all':
  70.         monthsname = ['january', 'february', 'march', 'april', 'may', 'june']
  71.         monthchoice = monthsname.index(monthchoice) + 1
  72.         df1 = df1[df1['monthchoice'] == monthchoice]
  73.        
  74.     #use daychoice to filter by day, if 'all' don't filter by daychoice, then create dataframe
  75.     if daychoice != 'all':
  76.         dayofweekname = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
  77.         daychoice = dayofweekname.index(daychoice) + 1
  78.         df1 = df1[df1['daychoice'] == daychoice]
  79.    
  80.    
  81.    
  82.    
  83.     return df1
  84.  
  85.  
  86. def time_stats(df1):
  87.     """Displays statistics on the most frequent times of travel."""
  88.  
  89.     print('\nCalculating The Most Frequent Times of Travel...\n')
  90.  
  91.     start_time = time.time()
  92.  
  93.     # TO DO: display the most common month
  94.  
  95. #extract month from date/time
  96.     df1['month'] = pd.DatetimeIndex(df1['Start Time']).month
  97.  
  98. #Calculate most popular month - it will be a number
  99.     monthnum = df1['month'].mode()#.values[0]
  100.  
  101. #convert most popular month number to month name
  102.     monthnum = 1
  103.  
  104.     if monthnum == 1:
  105.          monthname = "January"
  106.     elif monthnum == 2:
  107.         monthname = "February"
  108.     elif monthnum == 3:
  109.         monthname = "March"  
  110.     elif monthnum == 4:
  111.        monthname = "April"
  112.     elif monthnum == 5:
  113.        monthname = "May"
  114.     elif monthnum == 6:
  115.        monthname = "June"
  116.  
  117.  
  118.        
  119.     print("\nThe most popular month is {}.".format(
  120.         monthname))
  121.  
  122.  
  123.  
  124.     # TO DO: display the most common day of the week
  125.  
  126. #extract day of the week from date/time
  127.     df1['dayofweek'] = pd.DatetimeIndex(df1['Start Time']).day
  128.  
  129.      
  130. #Calculate most popular day - it will be a number
  131.     daynum = df1['dayofweek'].mode()#.values[0]
  132.  
  133.  
  134. #convert most popular day number to month name
  135.     daynum = 1
  136.  
  137.     if daynum == 1:
  138.         dayname = "Sunday"
  139.     elif daynum == 2:
  140.         dayname = "Monday"
  141.     elif daynum == 3:
  142.         dayname = "Tuesday"  
  143.     elif daynum == 4:
  144.         dayname = "Wednesday"
  145.     elif daynum == 5:
  146.         dayname = "Thursday"
  147.     elif daynum == 6:
  148.         dayname = "Friday"
  149.     elif daynum == 7:
  150.         dayname = "Saturday"
  151.      
  152. #print(dayname) tested and dayname is correct!
  153.  
  154.     print("\nThe most popular day of the week is {}.".format(
  155.         dayname))
  156.  
  157. #extract day of the week from date/time
  158.     df1['hourofday'] = pd.DatetimeIndex(df1['Start Time']).hour
  159.      
  160. #Calculate most popular day - it will be a number
  161.     hournum = df1['hourofday'].mode()#.values[0]
  162.  
  163.  
  164. #convert most popular day number to month name
  165.     hournum = 1
  166.  
  167.     if hournum == 1:
  168.         hourname = "1am"
  169.     elif hournum == 2:
  170.         hourname = "2am"
  171.     elif hournum == 3:
  172.         hourname = "3am"  
  173.     elif hournum == 4:
  174.         hourname = "4am"
  175.     elif hournum == 5:
  176.         hourname = "5am"
  177.     elif hournum == 6:
  178.         hourname = "6am"
  179.     elif hournum == 7:
  180.         hourname = "7am"
  181.     elif hournum == 8:
  182.         hourname = "8am"
  183.     elif hournum == 9:
  184.         hourname = "9am"
  185.     elif hournum == 10:
  186.         hourname = "10am"  
  187.     elif hournum == 11:
  188.         hourname = "11am"
  189.     elif hournum == 12:
  190.         hourname = "12pm"
  191.     elif hournum == 13:
  192.         hourname = "1pm"
  193.     elif hournum == 14:
  194.         hourname = "2pm"
  195.     elif hournum == 15:
  196.         hourname = "3pm"  
  197.     elif hournum == 16:
  198.         hourname = "4pm"
  199.     elif hournum == 17:
  200.         hourname = "5pm"
  201.     elif hournum == 18:
  202.         hourname = "6pm"
  203.     elif hournum == 19:
  204.         hourname = "7pm"
  205.     elif hournum == 20:
  206.         hourname = "8pm"
  207.     elif hournum == 21:
  208.         hourname = "9pm"
  209.     elif hournum == 22:
  210.         hourname = "10pm"  
  211.     elif hournum == 23:
  212.         hourname = "11pm"
  213.     elif hournum == 24:
  214.         hourname = "12am"
  215.  
  216.  
  217. #print(hourname) tested and hourname is correct!
  218.  
  219.     print("\nThe most popular hour of the day is {}.".format(
  220.         hourname))
  221.  
  222.     print("\nThis took %s seconds." % (time.time() - start_time))
  223.     print('-'*40)
  224.  
  225. def station_stats(df1):
  226.     """Displays statistics on the most popular stations and trip."""
  227.  
  228.     print('\nCalculating The Most Popular Stations and Trip...\n')
  229.     start_time = time.time()
  230.  
  231.     # TO DO: display most commonly used start station
  232.     print('\nThe most popular starting station is {}.'.format(
  233.         str(df1['Start Station'].mode().values[0])))
  234.  
  235.     # TO DO: display most commonly used end station
  236.     print('\nThe most popular ending station is {}.'.format(
  237.         str(df1['End Station'].mode().values[0])))
  238.  
  239.     # TO DO: display most frequent combination of start station and end station trip
  240.     df1['startendstation'] = df1['Start Station']+ " to " +df1['End Station']
  241.     print("\nThe most popular trip from start station to end station is {}.".format(
  242.         str(df1['startendstation'].mode().values[0])))
  243.  
  244.     print("\nThis took %s seconds." % (time.time() - start_time))
  245.     print('-'*40)
  246.  
  247.  
  248. def trip_duration_stats(df1):
  249.     """Displays statistics on the total and average trip duration."""
  250.  
  251.     print('\nCalculating Trip Duration...\n')
  252.     start_time = time.time()
  253.  
  254.     # TO DO: display total travel time
  255.     print("\nThe total travel time is {} years.".format(
  256.         str(df1['Trip Duration'].sum()//31536000)))
  257.  
  258.     # TO DO: display mean travel time
  259.     print("\nThe average travel time is {} minutes.".format(
  260.         str(df1['Trip Duration'].mean().astype(int)//60)))
  261.  
  262.     print("\nThis took %s seconds." % (time.time() - start_time))
  263.     print('-'*40)
  264.  
  265.  
  266. def user_stats(df1):
  267.     """Displays statistics on bikeshare users."""
  268.  
  269.     print('\nCalculating User Stats...\n')
  270.     start_time = time.time()
  271.  
  272.     # TO DO: Display counts of user types
  273. ##Old version that showed two columns of pandas table sub/cust and count
  274.     #print("\nThe number of Subscribers or Customers are:")
  275.     #print(df1['User Type'].value_counts())
  276.  
  277.     if 'User Type' in df1:
  278.         print("\nThe number of Subscribers and Customers are:")
  279.         #print(df1['Gender'].value_counts()) old version that was pandas table
  280.         utype_vals = []
  281.         utype_vals = str(df1['User Type'].value_counts()).split()
  282.         #print(utype_vals)  test of utype_vals
  283.         print("\nSubscriber    {}".format(utype_vals[1]))
  284.         print("Customer       {}".format(utype_vals[3]))
  285.    
  286.     # TO DO: Display counts of gender
  287. #Old version that showed two columns of pandas table gender and count
  288. #    if 'Gender' in df1:
  289. #       print("\nThe number of Male and Female customers are:")
  290. #        print(df1['Gender'].value_counts())
  291.  
  292.     if 'Gender' in df1:
  293.         print("\nThe number of Male and Female customers are:")
  294.         #print(df1['Gender'].value_counts()) old version that was pandas table
  295.         gender_vals = []
  296.         gender_vals = str(df1['Gender'].value_counts()).split()
  297.         #print(gender_vals) test of gender_vals
  298.         print("\nMale     {}".format(gender_vals[1]))
  299.         print("Female    {}".format(gender_vals[3]))
  300.  
  301.        
  302.        
  303.        
  304.     # TO DO: Display earliest, most recent, and most common year of birth
  305.     if 'Birth Year' in df1:
  306.         print("\nThe earliest year of birth is {}.".format(
  307.             str(df1['Birth Year'].min().astype(int))))
  308.  
  309.        
  310.         print("\nThe most recent year of birth is {}.".format(
  311.             str(df1['Birth Year'].max().astype(int))))
  312.  
  313.         by = ("\nThe most common year of birth is {}.".format(
  314.             str(df1['Birth Year'].value_counts().head(1).astype(int))))
  315.         print(by.split(".")[0]+'.')
  316.  
  317.  
  318.     print("\nThis took %s seconds." % (time.time() - start_time))
  319.     print('-'*40)
  320.  
  321. #ef rawdata(df1):
  322.     """
  323.    Returns
  324.        5 lines at a time of raw data
  325.    """
  326.  
  327.                
  328. def showraw(df1):
  329.      
  330.         #version that only allows yes or no
  331.    
  332.     startraw = 0
  333.     moreraw = 5
  334.     #rawdata1 = True
  335.     rawdata_more = True
  336.     while(rawdata_more):
  337.         showraw = input('\nWould you like to see 5 lines of raw data?  Type Yes or No: ')
  338.         if showraw.lower() == 'yes':
  339.                 rawdata_more = True
  340.                # rawdata1 = False
  341.                 print(df1[df1.columns[1:9]].iloc[startraw:moreraw])
  342.                 startraw += 5
  343.                 moreraw += 5
  344.                 continue
  345.         elif showraw.lower() == 'no':
  346.                 rawdata_more = False
  347.                 #rawdata1 = False
  348.                 break
  349.         else:
  350.             print("\nPlease answer Yes or No.")
  351.             rawdata_more = True
  352.    
  353.    
  354.    
  355.        
  356. def main():
  357.     while True:
  358.         city, monthchoice, daychoice = get_filters()
  359.         df1= load_data(city, monthchoice, daychoice)
  360.  
  361.         time_stats(df1)
  362.         station_stats(df1)
  363.         trip_duration_stats(df1)
  364.         user_stats(df1)
  365.         showraw(df1)
  366.        
  367.         dorestart = True
  368.         doquestion = True
  369.         while(doquestion):
  370.             restart = input('\nWould you like to restart? Enter yes or no.\n')
  371.             if restart.lower() == 'yes':
  372.                 dorestart = True
  373.                 doquestion = False
  374.                 continue
  375.             elif restart.lower() == 'no':
  376.                 dorestart = False
  377.                 doquestion = False
  378.                 break
  379.             else:
  380.                 print("Please answer Yes or No.")
  381.                 doquestion = True
  382.          
  383.         #End of Yes No question loop
  384.         if dorestart == False:
  385.             break
  386.     #end of restart for script
  387.    
  388.    
  389. if __name__ == "__main__":
  390.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement