a guest Jul 21st, 2019 78 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Python v3.6
  3. # SLCSP initial data cleansing only.
  6. import numpy as np
  7. import pandas as pd
  10. def creating_dataframes():
  11.     ''' Loading csv files into Pandas dataframes (df) '''
  12.     zips = pd.read_csv('zips.csv', delimiter=',', dtype={'zipcode': np.unicode_})
  13.     plans = pd.read_csv('plans.csv', delimiter=',')
  14.     slcsp = pd.read_csv('slcsp.csv', delimiter=',', dtype={'zipcode': np.unicode_})
  15.     print("Intital dataframes")
  16.     print(f"Zips records: {np.shape(zips)[0]}")
  17.     print(f"Plans records: {np.shape(plans)[0]}")
  18.     print(f"SLCSP records: {np.shape(slcsp)[0]}\n")
  19.     # zips columns: [zipcode, state, county_code, name, rate_area]
  20.     # plans columns: [plan_id, state, metal_level, rate, rate_area]
  21.     return zips, plans, slcsp
  23. def zips_in_target(zips, slcsp):
  24.     ''' Carrying forward only the zipcodes found in the slcsp df to be present in the zips df.
  25.         A series containing a boolean mask on the zipcode column in the zips df based on whether they are present in the final slcsp df is created.
  26.         That boolean mask is then applied back onto the zips df itself.'''
  27.     zips_verified = zips[zips.zipcode.isin(slcsp.zipcode)]
  28.     print(f"Zipcode exclusion. Only zipcodes in final SLCSP carry forward in zips dataframe.\nZips records: {np.shape(zips_verified)[0]}\n")
  29.     return zips_verified
  31. def filter_for_silver(plans):
  32.     ''' Carrying forward only the silver metal level plans in the plans df.
  33.         A series containing a boolean mask on the metal_level column in the plans df evaluating whether the metal level plan is 'Silver' is created.
  34.         That boolean mask is then applied back onto the plans df itself.'''
  35.     plans_silver = plans[plans.metal_level == "Silver"]
  36.     print(f"Filter for 'Silver' plans. Only silver plans carry forward in plans dataframe.\nPlans records: {np.shape(plans_silver)[0]}")
  37.     return plans_silver
  40. def main():
  41.     print("SLCSP problem - cleaning data, only.\n")
  42.     zips, plans, slcsp = creating_dataframes()
  43.     zips_verified = zips_in_target(zips, slcsp)
  44.     plans_silver = filter_for_silver(plans)
  46. if __name__ == '__main__':
  47.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand