Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import dask.dataframe as dd
- import pandas as pd
- import numpy as np
- df = pd.DataFrame({"a": np.arange(100)})
- ddf = dd.from_pandas(df, npartitions=4)
- # silly function. it just append the mean for every columns to every partition
- def fun(x):
- x = pd.concat([x, x.mean().to_frame().T], ignore_index=True)
- return x
- out = ddf.map_partitions(lambda x: fun(x)).compute()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement