Advertisement
Guest User

Untitled

a guest
Jul 21st, 2019
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.07 KB | None | 0 0
  1. # Python v3.6
  2.  
  3. # SLCSP initial data cleansing only.
  4.  
  5.  
  6. import numpy as np
  7. import pandas as pd
  8.  
  9.  
  10. def creating_dataframes():
  11. ''' Loading csv files into Pandas dataframes (df) '''
  12. zips = pd.read_csv('zips.csv', delimiter=',', dtype={'zipcode': np.unicode_})
  13. plans = pd.read_csv('plans.csv', delimiter=',')
  14. slcsp = pd.read_csv('slcsp.csv', delimiter=',', dtype={'zipcode': np.unicode_})
  15. print("Intital dataframes")
  16. print(f"Zips records: {np.shape(zips)[0]}")
  17. print(f"Plans records: {np.shape(plans)[0]}")
  18. print(f"SLCSP records: {np.shape(slcsp)[0]}\n")
  19. # zips columns: [zipcode, state, county_code, name, rate_area]
  20. # plans columns: [plan_id, state, metal_level, rate, rate_area]
  21. return zips, plans, slcsp
  22.  
  23. def zips_in_target(zips, slcsp):
  24. ''' Carrying forward only the zipcodes found in the slcsp df to be present in the zips df.
  25. A series containing a boolean mask on the zipcode column in the zips df based on whether they are present in the final slcsp df is created.
  26. That boolean mask is then applied back onto the zips df itself.'''
  27. zips_verified = zips[zips.zipcode.isin(slcsp.zipcode)]
  28. print(f"Zipcode exclusion. Only zipcodes in final SLCSP carry forward in zips dataframe.\nZips records: {np.shape(zips_verified)[0]}\n")
  29. return zips_verified
  30.  
  31. def filter_for_silver(plans):
  32. ''' Carrying forward only the silver metal level plans in the plans df.
  33. A series containing a boolean mask on the metal_level column in the plans df evaluating whether the metal level plan is 'Silver' is created.
  34. That boolean mask is then applied back onto the plans df itself.'''
  35. plans_silver = plans[plans.metal_level == "Silver"]
  36. print(f"Filter for 'Silver' plans. Only silver plans carry forward in plans dataframe.\nPlans records: {np.shape(plans_silver)[0]}")
  37. return plans_silver
  38.  
  39.  
  40. def main():
  41. print("SLCSP problem - cleaning data, only.\n")
  42. zips, plans, slcsp = creating_dataframes()
  43. zips_verified = zips_in_target(zips, slcsp)
  44. plans_silver = filter_for_silver(plans)
  45.  
  46. if __name__ == '__main__':
  47. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement