Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn.datasets import make_classification
- perc_nan = 0.3
- n_samples = 300000
- n_features = 5
- df = make_classification(n_samples, n_features)
- df = pd.DataFrame(df[0], columns = ['DataPoint_{0}'.format(i) for i in range(n_features)])
- c = int(round(df.size * perc_nan))
- A = df.to_numpy()
- A.ravel()[np.random.choice(A.size, c, replace=False)] = np.nan
- colname = df.columns
- del df
- df = pd.DataFrame(A, columns=colname)
- del A
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement