Guest User

Untitled

a guest
Aug 17th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.60 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. def describe_population(df: pd.DataFrame) -> pd.DataFrame:
  4. """
  5. Report the populated and uniqueness counts for each column of the input.
  6.  
  7. The ratio columns are given as percents.
  8. """
  9.  
  10. N = len(df)
  11.  
  12. dtypes = df.dtypes
  13.  
  14. distincts = df.nunique()
  15. nas = df.isnull().sum()
  16. pop = N - nas
  17.  
  18. out = pd.DataFrame()
  19.  
  20. out['dtype'] = dtypes
  21.  
  22. out['na'] = nas
  23. out['populated'] = pop
  24. out['distinct'] = distincts
  25.  
  26. out['pop/N'] = 100 * pop / N
  27. out['dist/pop'] = 100 * distincts / pop
  28.  
  29. out.columns.name = "N = {:,}".format(N)
  30.  
  31. return out
Add Comment
Please, Sign In to add comment