Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import datetime
- def read_file(file_p):
- '''
- Read in the file example_tape1.csv as a Pandas DataFrame, print out the datatypes and returns the output DataFrame. (hint: you might have a little cleaning to do)
- The file is accessible at this location: ./data/example_tape1.csv
- '''
- #This try catch pattern allow for the use of both CVS and Excel formatted inputs. It reduces a lot of user error and frustrations.
- try:
- '''skipping last line because it is malformed (possibly a timestamp. this may need refactored if all data is not
- formatted like this example is. Additionally, the example contains a large number of blank columns. I am not addressing
- this at this time in case they are used for something downstream. These would normally be removed for data cleanliness.'''
- df = pd.read_csv(file_p, engine='python', error_bad_lines=False, skipfooter=1)
- except:
- try:
- df = pd.read_excel(file_p, engine='python', error_bad_lines=False, skipfooter=1)
- except:
- print("Failed to read file, file is not of supported tile types (CSV,XLSX,XLS)")
- return df
- def max_cash(collections):
- '''
- Return the row of the DataFrame with the highest gross_cash
- Args
- collections <pd.DataFrame>
- Returns <pd.DataFrame> containing one row
- '''
- return collections.loc[collections['gross_cash'].idxmax()]
- def vintage_level_cash(collections):
- '''
- Calculate the total gross_cash collected for each vintage across all of time
- Args
- collections <pd.DataFrame>
- Returns <pd.DataFrame>
- '''
- vlc_collections = collections.copy()
- #Convert each vintage to a Datetime value
- vlc_collections['vintage'] = pd.to_datetime(vlc_collections['vintage'])
- # Set the datetime column as the index
- vlc_collections.index = vlc_collections['vintage']
- #resampling the dataframe by vintage month and calculating the sum's
- vintage_cash = vlc_collections.resample('M').sum()
- #reducing the data down to only the vintage and gross cash for easier readability.
- #vintage_cash = pd.DataFrame(vintage_cash, columns = ['vintage', 'gross_cash'])
- return vintage_cash[['gross_cash']]
- def vintage_totals(collections):
- '''
- Calculate the cumulative gross_cash collected for each vintage from inception through the indicated asofmonth.
- Args
- collections <pd.DataFrame>
- Returns <pd.DataFrame> with columns vintage, asofmonth, cumulative_gross_cash
- '''
- totals = []
- for vintage in collections['vintage'].unique():
- total = 0
- #print(vintage)
- subvin = collections[(collections['vintage'] == vintage)]
- for cash in subvin['gross_cash']:
- total = total + cash
- totals.append(round(total, 2))
- collections['cumulative_gross_cash'] = totals
- vintage_totals_result = pd.DataFrame(collections, columns = ['vintage', 'asofmonth', 'cumulative_gross_cash'])
- return(vintage_totals_result)
- def vintage_pivot(vintage_totals_result):
- '''
- Using the output of vintage_totals, pivot the DataFrame to make it wide and short, such that each column
- is the name of a vintage, each row indicates the asofmonth, and cumulative_gross_cash is the value in each cell.
- Please comment how you might plot the output data.
- Args
- vintage_totals_dataframe <pd.DataFrame>
- Returns <pd.DataFrame> with a column for each vintage
- '''
- vintage_pivot_result = vintage_totals_result.pivot(index='asofmonth', columns='vintage', values='cumulative_gross_cash')
- return vintage_pivot_result
- '''This is a "sentinel" it marks a break in the execution of the code post execution of the functions
- allowing these functions to used as a library for additional works.'''
- if __name__ == "__main__":
- #This variable sets the path to the data file to be read when executing this scrip standalone.
- file_p = "./data/example_tape1.csv"
- collections = read_file(file_p)
- if collections is not None:
- max_cash_result = max_cash(collections)
- #print(max_cash_result)
- vintage_level_result = vintage_level_cash(collections)
- #print(vintage_level_result)
- vintage_totals_result = vintage_totals(collections)
- #print(vintage_totals_result)
- vintage_pivot_result = vintage_pivot(vintage_totals_result)
- display(vintage_pivot_result)
Add Comment
Please, Sign In to add comment