Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- def find_nan_columns(df):
- ix_row, ix_col = np.where(np.asanyarray(np.isnan(df)))
- prods = pd.DataFrame({'row_idx':ix_row,
- 'col_idx':ix_col})
- b = [-np.inf] + np.arange(len(df.columns)).tolist() + [np.inf]
- l = ['Error'] + df.columns.tolist()
- prods['label'] = pd.cut(prods['col_idx'], bins=b, labels=l, right=False)
- prods = prods.drop(['col_idx'], axis=1)
- keys, values = prods.sort_values('row_idx').values.T
- ukeys, index = np.unique(keys, True)
- arrays = np.split(values, index[1:])
- df2 = pd.DataFrame({'Row': ukeys,
- 'Missing data': [list(a) for a in arrays]})
- return df2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement