KillianMills

remove_hawaii_alaska.py

Dec 8th, 2016
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.28 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. def remove_hawaii_alaska():
  4.     df = pd.read_csv("*************_2016-12-13.csv")
  5.  
  6.     print len(df)
  7.  
  8.     # lets remove 808 and 901 or 1808 and 1907
  9.     # so if the prefix starts with 1808 or 1907, that fucker has to go
  10.  
  11.     df["npanxx"] = df["npanxx"].astype(int).astype('str')
  12.  
  13.     df_alaska = df[df["npanxx"].str.startswith("1907")]
  14.  
  15.     df_hawaii = df[df["npanxx"].str.startswith("1808")]
  16.  
  17.     print len(df)
  18.     df = df.drop(df[df.npanxx.str.startswith("1907")].index)
  19.     df = df.drop(df[df.npanxx.str.startswith("1808")].index)
  20.     print len(df)
  21.  
  22.     print len(df_alaska)
  23.     print len(df_hawaii)
  24.  
  25.     df.to_csv("no_hawaii_alaska.csv")
  26.  
  27.     clean_df = pd.read_csv("no_hawaii_alaska.csv")
  28.     npa = pd.read_csv("npa.csv")
  29.  
  30.     #print npa.NPA
  31.  
  32.     npa_dict = {}
  33.     npa_dict.update(npa.NPA)
  34.     df_final = pd.DataFrame
  35.  
  36.     #print (npa_dict)
  37.     #print npa_dict[0]
  38.    
  39.     print ''
  40.     for x in range (0, len(npa_dict)):
  41.  
  42.         npa_prefix = "1" + str(npa_dict[x])
  43.  
  44.         if x is 0:
  45.             df_final = df[df["npanxx"].str.startswith(npa_prefix)]
  46.         elif x is not 0:
  47.             df_temp = df[df["npanxx"].str.startswith(npa_prefix)]
  48.             frames = [df_final, df_temp]
  49.             df_final = pd.concat(frames)
  50.  
  51.     #df_final = df[df["npanxx"].str.startswith(npa_dict)]
  52.  
  53.     print len(df_final)
  54.  
  55.     df_final.to_csv("final.csv")
  56.  
  57.  
  58.  
  59. remove_hawaii_alaska()
Advertisement
Add Comment
Please, Sign In to add comment