Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Helpful function to rip through a data frame and look at what is going on with it's columns
- def eda_helper(df):
- dict_list = []
- for col in df.columns:
- data = df[col]
- dict_ = {}
- # The null count for a column. Columns with no nulls are generally more interesting
- dict_.update({"null_count" : data.isnull().sum()})
- # Counting the unique values in a column
- # This is useful for seeing how interesting the column might be as a feature
- dict_.update({"unique_count" : len(data.unique())})
- # Finding the types of data in the column
- # This is useful for finding out potential problems with a column having strings and ints
- dict_.update({"data_type" : set([type(d).__name__ for d in data])})
- #dict_.update({"score" : match[1]})
- dict_list.append(dict_)
- eda_df = pd.DataFrame(dict_list)
- eda_df.index = df.columns
- return eda_df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement