Advertisement
Guest User

Untitled

a guest
Aug 24th, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.92 KB | None | 0 0
  1. # Helpful function to rip through a data frame and look at what is going on with it's columns
  2. def eda_helper(df):
  3. dict_list = []
  4. for col in df.columns:
  5. data = df[col]
  6. dict_ = {}
  7. # The null count for a column. Columns with no nulls are generally more interesting
  8. dict_.update({"null_count" : data.isnull().sum()})
  9. # Counting the unique values in a column
  10. # This is useful for seeing how interesting the column might be as a feature
  11. dict_.update({"unique_count" : len(data.unique())})
  12. # Finding the types of data in the column
  13. # This is useful for finding out potential problems with a column having strings and ints
  14. dict_.update({"data_type" : set([type(d).__name__ for d in data])})
  15. #dict_.update({"score" : match[1]})
  16. dict_list.append(dict_)
  17. eda_df = pd.DataFrame(dict_list)
  18. eda_df.index = df.columns
  19.  
  20. return eda_df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement