Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- def remove_hawaii_alaska():
- df = pd.read_csv("*************_2016-12-13.csv")
- print len(df)
- # lets remove 808 and 901 or 1808 and 1907
- # so if the prefix starts with 1808 or 1907, that fucker has to go
- df["npanxx"] = df["npanxx"].astype(int).astype('str')
- df_alaska = df[df["npanxx"].str.startswith("1907")]
- df_hawaii = df[df["npanxx"].str.startswith("1808")]
- print len(df)
- df = df.drop(df[df.npanxx.str.startswith("1907")].index)
- df = df.drop(df[df.npanxx.str.startswith("1808")].index)
- print len(df)
- print len(df_alaska)
- print len(df_hawaii)
- df.to_csv("no_hawaii_alaska.csv")
- clean_df = pd.read_csv("no_hawaii_alaska.csv")
- npa = pd.read_csv("npa.csv")
- #print npa.NPA
- npa_dict = {}
- npa_dict.update(npa.NPA)
- df_final = pd.DataFrame
- #print (npa_dict)
- #print npa_dict[0]
- print ''
- for x in range (0, len(npa_dict)):
- npa_prefix = "1" + str(npa_dict[x])
- if x is 0:
- df_final = df[df["npanxx"].str.startswith(npa_prefix)]
- elif x is not 0:
- df_temp = df[df["npanxx"].str.startswith(npa_prefix)]
- frames = [df_final, df_temp]
- df_final = pd.concat(frames)
- #df_final = df[df["npanxx"].str.startswith(npa_dict)]
- print len(df_final)
- df_final.to_csv("final.csv")
- remove_hawaii_alaska()
Advertisement
Add Comment
Please, Sign In to add comment