Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.24 KB | None | 0 0
  1. df = pd.DataFrame(np.arange(10).reshape(-1, 5), columns=list('ABCDE'))
  2. print(df)
  3.  
  4. A B C D E
  5. 0 0 1 2 3 4
  6. 1 5 6 7 8 9
  7.  
  8. df[['B', 'D']].values
  9.  
  10. array([[1, 3],
  11. [6, 8]])
  12.  
  13. from timeit import timeit
  14. import pandas as pd
  15. import numpy as np
  16. from string import ascii_uppercase as up
  17.  
  18.  
  19. def slc_df_2val(df, cols):
  20. return df[cols].values
  21.  
  22. def as_matrix(df, cols):
  23. return df.as_matrix(cols)
  24.  
  25. def hstack_per_col(df, cols):
  26. return np.hstack([df[c].values[:, None] for c in cols])
  27.  
  28. def stack_per_col_T(df, cols):
  29. return np.stack([df[c].values for c in cols]).reshape(-1, len(cols))
  30.  
  31. def get_loc_slc_array(df, cols):
  32. a = [df.columns.get_loc(c) for c in cols]
  33. return df.values[:, a]
  34.  
  35. mcol = pd.MultiIndex.from_product([list(up[:10]), list(up[-10:])])
  36.  
  37. sizes = pd.MultiIndex.from_product(
  38. [[10, 100, 1000, 10000], [1, 5, 10, 20, 30, 40]],
  39. names=['n', 'm'])
  40.  
  41. methods = pd.Index(
  42. 'slc_df_2val as_matrix hstack_per_col stack_per_col_T get_loc_slc_array'.split(),
  43. name='method')
  44.  
  45. results = pd.DataFrame(index=sizes, columns=methods)
  46.  
  47. np.random.seed([3,1415])
  48. for n in sizes.levels[0]:
  49. df = pd.DataFrame(np.arange(n * 100).reshape(-1, 100), columns=mcol)
  50. for m in sizes.levels[1]:
  51. cols = np.random.choice(mcol, m, replace=False)
  52. for f in methods:
  53. stmt = '{}(df, cols)'.format(f)
  54. setup = 'from __main__ import {}, df, cols'.format(f)
  55. tvalue = timeit(stmt, setup, number=500)
  56. results.set_value((n, m), f, tvalue)
  57.  
  58. fig, axes = plt.subplots(2, 2, figsize=(8, 6))
  59. for i, n in enumerate(sizes.levels[0]):
  60. ax = axes[i // 2, i % 2]
  61. results.xs(n).plot(lw=2, ax=ax, title='size {}'.format(n))
  62. ax.legend().remove()
  63.  
  64. axes[-1, -1].legend(bbox_to_anchor=(1.7, 2.4), fontsize=10)
  65.  
  66. fig.suptitle('Num Columns Perspective', fontsize=10)
  67.  
  68. fig.tight_layout()
  69. plt.subplots_adjust(top=.9)
  70.  
  71. fig, axes = plt.subplots(3, 2, figsize=(8, 9))
  72. for i, m in enumerate(sizes.levels[1]):
  73. ax = axes[i // 2, i % 2]
  74. results.xs(m, level=1).plot(lw=2, ax=ax, title='num cols {}'.format(m), rot=45)
  75. ax.legend().remove()
  76.  
  77. axes[-1, -1].legend(bbox_to_anchor=(1.7, 4.1), fontsize=10)
  78.  
  79. fig.suptitle('Array Length Perspective', fontsize=10)
  80.  
  81. fig.tight_layout()
  82. plt.subplots_adjust(top=.9)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement