pandas test 2

import pandas as pd
import datetime

def read_file(file_p):
    '''
    Read in the file example_tape1.csv as a Pandas DataFrame, print out the datatypes and returns the output DataFrame. (hint: you might have a little cleaning to do)

    The file is accessible at this location: ./data/example_tape1.csv
    '''
    #This try catch pattern allow for the use of both CVS and Excel formatted inputs. It reduces a lot of user error and frustrations.
    try:
        '''skipping last line because it is malformed (possibly a timestamp. this may need refactored if all data is not
        formatted like this example is. Additionally, the example contains a large number of blank columns. I am not addressing
        this at this time in case they are used for something downstream. These would normally be removed for data cleanliness.'''
        df = pd.read_csv(file_p, engine='python', error_bad_lines=False, skipfooter=1)
    except:
        try:
            df = pd.read_excel(file_p, engine='python', error_bad_lines=False, skipfooter=1)
        except:
            print("Failed to read file, file is not of supported tile types (CSV,XLSX,XLS)")

    return df


def max_cash(collections):
    '''
    Return the row of the DataFrame with the highest gross_cash

    Args
    collections <pd.DataFrame>

    Returns <pd.DataFrame> containing one row
    '''
    return collections.loc[collections['gross_cash'].idxmax()]

def vintage_level_cash(collections):
    '''
    Calculate the total gross_cash collected for each vintage across all of time

    Args
    collections <pd.DataFrame>

    Returns <pd.DataFrame>
    '''
    vlc_collections = collections.copy()

    #Convert each vintage to a Datetime value
    vlc_collections['vintage'] = pd.to_datetime(vlc_collections['vintage'])

    # Set the datetime column as the index
    vlc_collections.index = vlc_collections['vintage']

    #resampling the dataframe by vintage month and calculating the sum's
    vintage_cash = vlc_collections.resample('M').sum()

    #reducing the data down to only the vintage and gross cash for easier readability.
    #vintage_cash = pd.DataFrame(vintage_cash, columns = ['vintage', 'gross_cash'])
    return vintage_cash[['gross_cash']]

def vintage_totals(collections):
    '''
    Calculate the cumulative gross_cash collected for each vintage from inception through the indicated asofmonth.

    Args
    collections <pd.DataFrame>

    Returns <pd.DataFrame> with columns vintage, asofmonth, cumulative_gross_cash
    '''
    totals = []
    for vintage in collections['vintage'].unique():
        total = 0
        #print(vintage)
        subvin = collections[(collections['vintage'] == vintage)]
        for cash in subvin['gross_cash']:
            total = total + cash
            totals.append(round(total, 2))

    collections['cumulative_gross_cash'] = totals
    vintage_totals_result = pd.DataFrame(collections, columns = ['vintage', 'asofmonth', 'cumulative_gross_cash'])
    return(vintage_totals_result)


def vintage_pivot(vintage_totals_result):
    '''
    Using the output of vintage_totals, pivot the DataFrame to make it wide and short, such that each column
    is the name of a vintage, each row indicates the asofmonth, and cumulative_gross_cash is the value in each cell.
    Please comment how you might plot the output data.

    Args
    vintage_totals_dataframe <pd.DataFrame>

    Returns <pd.DataFrame> with a column for each vintage
    '''
    vintage_pivot_result = vintage_totals_result.pivot(index='asofmonth', columns='vintage', values='cumulative_gross_cash')
    return vintage_pivot_result

'''This is a "sentinel" it marks a break in the execution of the code post execution of the functions
allowing these functions to used as a library for additional works.'''
if __name__ == "__main__":

    #This variable sets the path to the data file to be read when executing this scrip standalone.
    file_p = "./data/example_tape1.csv"

    collections = read_file(file_p)

    if collections is not None:
        max_cash_result = max_cash(collections)
        #print(max_cash_result)
        vintage_level_result = vintage_level_cash(collections)
        #print(vintage_level_result)
        vintage_totals_result = vintage_totals(collections)
        #print(vintage_totals_result)
        vintage_pivot_result = vintage_pivot(vintage_totals_result)
        display(vintage_pivot_result)