Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import polars as pl
- import perfplot
- def map_elements(s):
- return s.map_elements(lambda x: [y for y in x if y != 'remove']).list.join('_')
- def list_eval(s):
- return s.list.eval(pl.element().filter(pl.element() != 'remove'))
- def list_set_difference(s):
- return s.list.set_difference(['remove'])
- def filter(s):
- return (pl.select(a=s)
- .with_row_index('i')
- .explode('a')
- .filter(pl.col('a')!='remove')
- .group_by('i')
- .agg('a')
- .select('a')
- )
- def gather(s):
- return (pl.select(a=s)
- .with_row_index('i')
- .group_by('i')
- .agg(pl.col('a').list.gather(pl.arg_where(pl.col('a').explode()!="remove")).first())
- .select('a')
- )
- def setup(n):
- return pl.Series([list(map(str, range(100)))+['remove'] for _ in range(n)])
- b2 = perfplot.bench(
- setup=setup,
- kernels=[map_elements, list_eval, list_set_difference, filter, gather],
- n_range=np.logspace(1, 5, num=10),
- xlabel='len(df)',
- equality_check=None,
- max_time=10
- )
- b2.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement