Guest User

Untitled

a guest
Jun 20th, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.15 KB | None | 0 0
  1. In [171]: A = np.array([1.1, 1.1, 3.3, 3.3, 5.5, 6.6])
  2.  
  3. In [172]: B = np.array([111, 222, 222, 333, 333, 777])
  4.  
  5. In [173]: C = randint(10, 99, 6)
  6.  
  7. In [174]: df = pd.DataFrame(zip(A, B, C), columns=['A', 'B', 'C'])
  8.  
  9. In [175]: df.set_index(['A', 'B'], inplace=True)
  10.  
  11. In [176]: df
  12. Out[176]:
  13. C
  14. A B
  15. 1.1 111 20
  16. 222 31
  17. 3.3 222 24
  18. 333 65
  19. 5.5 333 22
  20. 6.6 777 74
  21.  
  22. In [536]: result_df = df.loc[(df.index.get_level_values('A') > 1.7) & (df.index.get_level_values('B') < 666)]
  23.  
  24. In [537]: result_df
  25. Out[537]:
  26. C
  27. A B
  28. 3.3 222 43
  29. 333 59
  30. 5.5 333 56
  31.  
  32. In [538]: result_df.index.get_level_values('A')
  33. Out[538]: Index([3.3, 3.3, 5.5], dtype=object)
  34.  
  35. In [558]: df = store.select(STORE_EXTENT_BURSTS_DF_KEY)
  36.  
  37. In [559]: len(df)
  38. Out[559]: 12857
  39.  
  40. In [560]: df.sort(inplace=True)
  41.  
  42. In [561]: df_without_index = df.reset_index()
  43.  
  44. In [562]: %timeit df.loc[(df.index.get_level_values('END_TIME') > 358200) & (df.index.get_level_values('START_TIME') < 361680)]
  45. 1000 loops, best of 3: 562 ยตs per loop
  46.  
  47. In [563]: %timeit df_without_index[(df_without_index.END_TIME > 358200) & (df_without_index.START_TIME < 361680)]
  48. 1000 loops, best of 3: 507 ยตs per loop
  49.  
  50. In [12]: df
  51. Out[12]:
  52. C
  53. A B
  54. 1.1 111 68
  55. 222 40
  56. 3.3 222 20
  57. 333 11
  58. 5.5 333 80
  59. 6.6 777 51
  60.  
  61. In [13]: df.query('3.3 <= A <= 6.6') # for closed interval
  62. Out[13]:
  63. C
  64. A B
  65. 3.3 222 20
  66. 333 11
  67. 5.5 333 80
  68. 6.6 777 51
  69.  
  70. In [14]: df.query('3.3 < A < 6.6') # for open interval
  71. Out[14]:
  72. C
  73. A B
  74. 5.5 333 80
  75.  
  76. In [15]: df.query('2.0 <= A <= 4.0')
  77. Out[15]:
  78. C
  79. A B
  80. 3.3 222 20
  81. 333 11
  82.  
  83. In [16]: df.query('111 <= B <= 500')
  84. Out[16]:
  85. C
  86. A B
  87. 1.1 111 68
  88. 222 40
  89. 3.3 222 20
  90. 333 11
  91. 5.5 333 80
  92.  
  93. In [17]: df.query('0 < A < 4 and 150 < B < 400')
  94. Out[17]:
  95. C
  96. A B
  97. 1.1 222 40
  98. 3.3 222 20
  99. 333 11
  100.  
  101. In [11]: df
  102. Out[11]:
  103. C
  104. A B
  105. 1.1 111 81
  106. 222 45
  107. 3.3 222 98
  108. 333 13
  109. 5.5 333 89
  110. 6.6 777 98
  111.  
  112. In [12]: x = df.reset_index()
  113.  
  114. In [13]: x.loc[(x.A>=3.3)&(x.A<=6.6)]
  115. Out[13]:
  116. A B C
  117. 2 3.3 222 98
  118. 3 3.3 333 13
  119. 4 5.5 333 89
  120. 5 6.6 777 98
  121.  
  122. In [14]: x.loc[(x.A>=2.0)&(x.A<=4.0)]
  123. Out[14]:
  124. A B C
  125. 2 3.3 222 98
  126. 3 3.3 333 13
  127.  
  128. In [15]: x.loc[(x.B>=111.0)&(x.B<=500.0)]
  129. Out[15]:
  130. A B C
  131. 0 1.1 111 81
  132. 1 1.1 222 45
  133. 2 3.3 222 98
  134. 3 3.3 333 13
  135. 4 5.5 333 89
  136.  
  137. In [16]: x.loc[(x.B>=111.0)&(x.B<=500.0)].set_index(['A','B'])
  138. Out[16]:
  139. C
  140. A B
  141. 1.1 111 81
  142. 222 45
  143. 3.3 222 98
  144. 333 13
  145. 5.5 333 89
  146.  
  147. In [5]: x.loc[(x.B>=111.0)&(x.B<=500.0)].set_index(['A','B']).index
  148. Out[5]:
  149. MultiIndex
  150. [(1.1, 111), (1.1, 222), (3.3, 222), (3.3, 333), (5.5, 333)]
Add Comment
Please, Sign In to add comment