Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.DataFrame(np.arange(10).reshape(-1, 5), columns=list('ABCDE'))
- print(df)
- A B C D E
- 0 0 1 2 3 4
- 1 5 6 7 8 9
- df[['B', 'D']].values
- array([[1, 3],
- [6, 8]])
- from timeit import timeit
- import pandas as pd
- import numpy as np
- from string import ascii_uppercase as up
- def slc_df_2val(df, cols):
- return df[cols].values
- def as_matrix(df, cols):
- return df.as_matrix(cols)
- def hstack_per_col(df, cols):
- return np.hstack([df[c].values[:, None] for c in cols])
- def stack_per_col_T(df, cols):
- return np.stack([df[c].values for c in cols]).reshape(-1, len(cols))
- def get_loc_slc_array(df, cols):
- a = [df.columns.get_loc(c) for c in cols]
- return df.values[:, a]
- mcol = pd.MultiIndex.from_product([list(up[:10]), list(up[-10:])])
- sizes = pd.MultiIndex.from_product(
- [[10, 100, 1000, 10000], [1, 5, 10, 20, 30, 40]],
- names=['n', 'm'])
- methods = pd.Index(
- 'slc_df_2val as_matrix hstack_per_col stack_per_col_T get_loc_slc_array'.split(),
- name='method')
- results = pd.DataFrame(index=sizes, columns=methods)
- np.random.seed([3,1415])
- for n in sizes.levels[0]:
- df = pd.DataFrame(np.arange(n * 100).reshape(-1, 100), columns=mcol)
- for m in sizes.levels[1]:
- cols = np.random.choice(mcol, m, replace=False)
- for f in methods:
- stmt = '{}(df, cols)'.format(f)
- setup = 'from __main__ import {}, df, cols'.format(f)
- tvalue = timeit(stmt, setup, number=500)
- results.set_value((n, m), f, tvalue)
- fig, axes = plt.subplots(2, 2, figsize=(8, 6))
- for i, n in enumerate(sizes.levels[0]):
- ax = axes[i // 2, i % 2]
- results.xs(n).plot(lw=2, ax=ax, title='size {}'.format(n))
- ax.legend().remove()
- axes[-1, -1].legend(bbox_to_anchor=(1.7, 2.4), fontsize=10)
- fig.suptitle('Num Columns Perspective', fontsize=10)
- fig.tight_layout()
- plt.subplots_adjust(top=.9)
- fig, axes = plt.subplots(3, 2, figsize=(8, 9))
- for i, m in enumerate(sizes.levels[1]):
- ax = axes[i // 2, i % 2]
- results.xs(m, level=1).plot(lw=2, ax=ax, title='num cols {}'.format(m), rot=45)
- ax.legend().remove()
- axes[-1, -1].legend(bbox_to_anchor=(1.7, 4.1), fontsize=10)
- fig.suptitle('Array Length Perspective', fontsize=10)
- fig.tight_layout()
- plt.subplots_adjust(top=.9)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement