Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- def describe_population(df: pd.DataFrame) -> pd.DataFrame:
- """
- Report the populated and uniqueness counts for each column of the input.
- The ratio columns are given as percents.
- """
- N = len(df)
- dtypes = df.dtypes
- distincts = df.nunique()
- nas = df.isnull().sum()
- pop = N - nas
- out = pd.DataFrame()
- out['dtype'] = dtypes
- out['na'] = nas
- out['populated'] = pop
- out['distinct'] = distincts
- out['pop/N'] = 100 * pop / N
- out['dist/pop'] = 100 * distincts / pop
- out.columns.name = "N = {:,}".format(N)
- return out
Add Comment
Please, Sign In to add comment