Guest User

Untitled

a guest
Jun 24th, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.64 KB | None | 0 0
  1. from datetime import datetime
  2. import pandas as pd
  3. import numpy as np
  4. #from datetime import timedelta
  5. import time
  6. CITY_DATA = { 'chicago': 'chicago.csv',
  7. 'new york': 'new_york_city.csv',
  8. 'washington': 'washington.csv' }
  9. def get_city():
  10. """
  11. Function:
  12. Asks user to specify a city to analyse.
  13. Args:
  14. None
  15. Returns:
  16. (str) city - name of the city to analyse.
  17. """
  18. # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  19. cities = ['Chicago','New York','Newyork','Washington']
  20. while True:
  21. try:
  22. city = input("\nWould you like to see data for Chicago, New york, or Washington?")
  23. if city.title() not in cities:
  24. print("\nPlease enter a valid city.")
  25. city = input('\nWould you like to see data for Chicago, New york, or Washington?')
  26. break
  27. except:
  28. print("\nLooks like it is not a valid input.")
  29. if city.title() == 'Newyork':
  30. city = city[0:3] + ' ' + city[3:]
  31. return city.lower()
  32. def get_month():
  33. """
  34. Function:
  35. Asks user to specify a month to filter the data.
  36. Args:
  37. None
  38. Returns:
  39. (str)month - name of the month to filter the data.
  40. """
  41. month = ''
  42. numbers = ['1','2','3','4','5','6','7','8','9','0']
  43. months = ['January', 'February', 'March', 'April', 'May', 'June']
  44. if month not in months:
  45. while True:
  46. try:
  47. month = input("\nWhich month? January, February, March, April, May or June?")
  48. if month in numbers:
  49. print('\nPlease do not enter the month in numbers')
  50. if month.title() not in months:
  51. print("\nPlease enter a month between January and June")
  52. while True:
  53. try:
  54. month = input("\nWhich month? January, February, March, April, May or June?")
  55. break
  56. except:
  57. print("\nLooks like it is not a valid input.")
  58. break
  59. except:
  60. print("\nLooks like it is not a valid input")
  61. return month
  62. def get_day():
  63. """
  64. Function:
  65. Asks user to specify a day to filter the data
  66. Args:
  67. None
  68. Returns:
  69. (str)day - name of the day to filter the data.
  70. """
  71. while True:
  72. try:
  73. day = int(input("\nWhich day? Please type your response as an integer.(e.g.,sunday = 1)"))
  74. break
  75. except:
  76. print("\nLooks like it is not a valid input.")
  77. return day
  78. def get_filter():
  79. '''
  80. Function:
  81. Asks user to apply a filter
  82. Args:
  83. None
  84. Returns:
  85. specific filter entered by the user
  86. '''
  87. filter = ''
  88. filters = ['month','day','none']
  89. if filter not in filters:
  90. while True:
  91. try:
  92. filter = input("\nWould you like to filter the data by month, day, or not at all? Type 'none' for no time filter.")
  93. if filter.lower() not in filters:
  94. print("\nInvalid filter!")
  95. filter = input("\nWould you like to filter the data by month, day, or not at all? Type 'none' for no time filter.")
  96. break
  97. except:
  98. print("\nLooks like it is not a valid input.")
  99. #if filter == 'none':
  100. # filter = filter.title()
  101. return filter
  102. def get_data():
  103. """
  104. Function:
  105. Asks user to specify a city, month, and day to analyze.
  106. Args:
  107. None
  108. Returns:
  109. (str) city - name of the city to analyze
  110. (str) month - name of the month to filter by, or "all" to apply no month filter
  111. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  112. (str) filter - filter specified by the user
  113. """
  114. print('\nHello! Let\'s explore some US bikeshare data!')
  115. # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
  116. city = get_city()
  117. filter = get_filter()
  118. if filter == 'month':
  119. # TO DO: get user input for month (all, january, february, ... , june)
  120. month = get_month()
  121. day = None
  122. elif filter == 'day':
  123. # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
  124. day = get_day()
  125. month = None
  126. else:
  127. month,day = None,None
  128. print('-'*40)
  129. return city,month,day,filter
  130.  
  131.  
  132. def load_data(city, month, day,filter):
  133. """
  134. Function:
  135. Loads data for the specified city and filters by month and day if applicable.
  136. Args:
  137. (str) city - name of the city to analyze
  138. (str) month - name of the month to filter by, or "all" to apply no month filter
  139. (str) day - name of the day of week to filter by, or "all" to apply no day filter
  140. (str) filter - specific filter that entered by user
  141. Returns:
  142. df - Pandas DataFrame containing city data filtered by month and day
  143. """
  144.  
  145. df = pd.read_csv(CITY_DATA[city])
  146. df.fillna(0)
  147. if filter == 'none':
  148. df_filter = df
  149. elif filter == 'month':
  150. months = {'january':1,'february':2,'march':3,'april':4,'may':5,'june':6}
  151. df['Start Time'] = pd.to_datetime(df['Start Time'])
  152. df['month'] = df['Start Time'].dt.month
  153. df_filter = df[df['month'] == months[month]]
  154. else:
  155. days = {1:'Sunday',2:'Monday',3:'Tuesday',4:'Wednesday',5:'Thursday',6:'Friday',7:'Saturday'}
  156. df['Start Time'] = pd.to_datetime(df['Start Time'])
  157. df['day'] = df['Start Time'].dt.weekday_name
  158. df_filter = df[df['day'] == days[day]]
  159. return df_filter
  160.  
  161.  
  162. def time_stats(df,filter):
  163. """
  164. Function:
  165. Displays statistics on the most frequent times of travel.
  166. Args:
  167. df - python data frame
  168. filter - specific filter that entered by user
  169. Returns:
  170. None
  171. """
  172. print('Calculating The Most Frequent Times of Travel...\n')
  173. start_time = time.time()
  174. # TO DO: display the most common month
  175. if filter == 'day' or filter == 'none':
  176. months = ['January', 'February', 'March', 'April', 'May', 'June']
  177. df['Start Time'] = pd.to_datetime(df['Start Time'])
  178. index = df['Start Time'].dt.month.mode()[0]
  179. most_pop_month = months[index - 1]
  180. print('\nMost popular month: {}'.format(most_pop_month))
  181.  
  182. # TO DO: display the most common day of week
  183. if filter == 'month' or filter == 'none':
  184. days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
  185. 'Saturday', 'Sunday']
  186. df['Start Time'] = pd.to_datetime(df['Start Time'])
  187. index = df['Start Time'].dt.dayofweek.mode()[0]
  188. most_pop_day = days_of_week[index]
  189. print('\nMost popular day: {}'.format(most_pop_day))
  190. # TO DO: display the most common start hour
  191. if filter == 'month' or filter == 'day' or filter == 'none':
  192. df['Start Time'] = pd.to_datetime(df['Start Time'])
  193. most_pop_hour = df['Start Time'].dt.hour.mode()[0]
  194. if most_pop_hour == 0:
  195. meridiem = 'am'
  196. most_pop_hour = 12
  197. elif 1 <= most_pop_hour < 13:
  198. meridiem = 'am'
  199. elif 13 <= most_pop_hour < 24:
  200. meridiem = 'pm'
  201. most_pop_hour -= 12
  202. print('\nThe most popular hour of day for start time is {}{}.'.format(most_pop_hour, meridiem))
  203.  
  204. print("\nThis took %s seconds." % (time.time() - start_time))
  205. print('-'*40)
  206.  
  207.  
  208. def station_stats(df):
  209. """Function:
  210. Displays statistics on the most popular stations and trip.
  211. Args:
  212. df - Pandas data frame filtered by month or day
  213. Returns:
  214. None
  215. """
  216.  
  217. print('\nCalculating The Most Popular Stations and Trip...\n')
  218. start_time = time.time()
  219.  
  220. # TO DO: display most commonly used start station
  221. pop_start_station = df['Start Station'].mode()[0]
  222. print('\nThe most popular start station is: {}'.format(pop_start_station))
  223. # TO DO: display most commonly used end station
  224. pop_end_station = df['End Station'].mode()[0]
  225. print('\nThe most popular end station is: {}'.format(pop_end_station))
  226.  
  227. # TO DO: display most frequent combination of start station and end station trip
  228. df['trip'] = df['Start Station'] + ' to ' + df['End Station']
  229. pop_trip = df['trip'].mode()[0]
  230. print('\nThe most popular trip is: {}'.format(pop_trip))
  231. print("\nThis took %s seconds." % (time.time() - start_time))
  232. print('-'*40)
  233.  
  234.  
  235. def trip_duration_stats(df):
  236. """Function:
  237. Displays statistics on the total and average trip duration.
  238. Args:
  239. df - Pandas data frame filtered by month or day
  240. Returns:
  241. None
  242. """
  243.  
  244. print('\nCalculating Trip Duration...\n')
  245. start_time = time.time()
  246.  
  247. # TO DO: display total travel time
  248. total_trip_duration = df['Trip Duration'].sum()
  249. remain_seconds = total_trip_duration % 60
  250. minutes = total_trip_duration // 60
  251. remain_minutes = minutes % 60
  252. hours = minutes // 60
  253. print('\nThe total trip duration is {} hours, {} minutes, {} seconds'.format(hours,remain_minutes,remain_seconds))
  254. # TO DO: display mean travel time
  255. mean_trip_time = df['Trip Duration'].mean()
  256. remain_seconds = mean_trip_time % 60
  257. minutes = mean_trip_time // 60
  258. remain_minutes = minutes % 60
  259. hours = minutes // 60
  260. print('\nThe mean trip duration is {} hours, {} minutes, {} seconds'.format(hours,remain_minutes,remain_seconds))
  261. print("\nThis took %s seconds." % (time.time() - start_time))
  262. print('-'*40)
  263.  
  264.  
  265. def user_stats(df,city):
  266. """Function:
  267. Displays statistics on bikeshare users.
  268. Args:
  269. df - Pandas data frame filtered by month or day
  270. city - specific city that entered by user
  271. """
  272.  
  273. print('\nCalculating User Stats...\n')
  274. start_time = time.time()
  275.  
  276. # TO DO: Display counts of user types
  277. df['User'] = df['User Type']
  278. subscribers = df.query('User == "Subscriber" ').User.count()
  279. customers = df.query('User == "Customer" ').User.count()
  280. print('\nThere are {} subscribers and {} customers'.format(subscribers,customers))
  281. # TO DO: Display counts of gender
  282. if city == 'chicago' or city == 'new york':
  283. male = df.query('Gender == "Male" ').Gender.count()
  284. female = df.query('Gender == "Female" ').Gender.count()
  285. # TO DO: Display earliest, most recent, and most common year of birth
  286. earliest_birth = df['Birth Year'].min()
  287. most_recent_birth = df['Birth Year'].max()
  288. most_common_birth = df['Birth Year'].mode()[0]
  289. print('\nOldest user was born in: {}'.format(earliest_birth))
  290. print('\nYoungest user was born in: {}'.format(most_recent_birth))
  291. print('\nMost popular birth year is: {}'.format(most_common_birth) )
  292. print("\nThis took %s seconds." % (time.time() - start_time))
  293. print('-'*40)
  294.  
  295. def more_data(df,filter):
  296. '''
  297. Function:
  298. Calculates individual trip data
  299. Args:
  300. df - Pandas data frame filtered by month or day
  301. filter - specific filter entered by user
  302. Returns:
  303. None
  304. '''
  305. if filter == 'month':
  306. df = df.drop(['trip','User','month'], axis = 1)
  307. print(df.head(1))
  308. print('-'*40)
  309. elif filter == 'day':
  310. df = df.drop(['trip','User','day'], axis = 1)
  311. print(df.head(1))
  312. print('-'*40)
  313. else:
  314. df = df.drop(['trip','User'], axis = 1)
  315. print(df.head(1))
  316. print('-'*40)
  317. def individual_trip_stats(df,filter):
  318. '''
  319. Function:
  320. Asks if user wants to see individual trip data
  321. Args:
  322. df - Pandas data frame filtered by month or day
  323. filter - specific filter that entered by user
  324. Returns:
  325. None
  326. '''
  327. start_time = time.time()
  328. key = True
  329. while key == True:
  330. opt_more_data = input('Would you like to see individual trip data?(yes or no)')
  331. if opt_more_data.lower() == 'yes':
  332. more_data(df,filter)
  333. elif opt_more_data.lower() == 'no':
  334. break
  335. else:
  336. print("Looks like it is not a valid input.")
  337. print('\nThis took %s seconds.'%(time.time() - start_time))
  338. print('-'*40)
  339.  
  340. def main():
  341. while True:
  342. city, month, day,filter = get_data()
  343. df = load_data(city, month, day, filter)
  344. time_stats(df,filter)
  345. station_stats(df)
  346. trip_duration_stats(df)
  347. user_stats(df,city)
  348. individual_trip_stats(df,filter)
  349. restart = input('\nWould you like to restart? Enter yes or no.\n')
  350. if restart.lower() != 'yes':
  351. break
  352.  
  353.  
  354. if __name__ == "__main__":
  355. main()
Add Comment
Please, Sign In to add comment