remove_hawaii_alaska.py

import pandas as pd

def remove_hawaii_alaska():
    df = pd.read_csv("*************_2016-12-13.csv")

    print len(df)

    # lets remove 808 and 901 or 1808 and 1907
    # so if the prefix starts with 1808 or 1907, that fucker has to go

    df["npanxx"] = df["npanxx"].astype(int).astype('str')

    df_alaska = df[df["npanxx"].str.startswith("1907")]

    df_hawaii = df[df["npanxx"].str.startswith("1808")]

    print len(df)
    df = df.drop(df[df.npanxx.str.startswith("1907")].index)
    df = df.drop(df[df.npanxx.str.startswith("1808")].index)
    print len(df)

    print len(df_alaska)
    print len(df_hawaii)

    df.to_csv("no_hawaii_alaska.csv")

    clean_df = pd.read_csv("no_hawaii_alaska.csv")
    npa = pd.read_csv("npa.csv")

    #print npa.NPA

    npa_dict = {}
    npa_dict.update(npa.NPA)
    df_final = pd.DataFrame

    #print (npa_dict)
    #print npa_dict[0]

    print ''
    for x in range (0, len(npa_dict)):

        npa_prefix = "1" + str(npa_dict[x])

        if x is 0:
            df_final = df[df["npanxx"].str.startswith(npa_prefix)]
        elif x is not 0:
            df_temp = df[df["npanxx"].str.startswith(npa_prefix)]
            frames = [df_final, df_temp]
            df_final = pd.concat(frames)

    #df_final = df[df["npanxx"].str.startswith(npa_dict)]

    print len(df_final)

    df_final.to_csv("final.csv")


remove_hawaii_alaska()