Guest User

Untitled

a guest
Oct 23rd, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.18 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. In [109]: s = pd.Series([1,2,3,4,6,3,4,0,2])
  4.  
  5. In [110]: s
  6. Out[110]:
  7. 0 1
  8. 1 2
  9. 2 3
  10. 3 4
  11. 4 6
  12. 5 3
  13. 6 4
  14. 7 0
  15. 8 2
  16. dtype: int64
  17.  
  18. In [111]: s.rolling(3, min_periods=1).sum()
  19. Out[111]:
  20. 0 1.0
  21. 1 3.0
  22. 2 6.0
  23. 3 9.0
  24. 4 13.0
  25. 5 13.0
  26. 6 13.0
  27. 7 7.0
  28. 8 6.0
  29. dtype: float64
  30.  
  31. In [112]: idx = s.rolling(3, min_periods=1).sum().idxmax()
  32.  
  33. In [113]: idx
  34. Out[113]: 4
  35.  
  36. In [114]: s.loc[idx-2:idx]
  37. Out[114]:
  38. 2 3
  39. 3 4
  40. 4 6
  41. dtype: int64
  42.  
  43. In [18]: a = np.random.randint(10**4, size=10**6)
  44.  
  45. In [19]: a
  46. Out[19]: array([9918, 4299, 7829, ..., 7513, 3367, 7140])
  47.  
  48. In [20]: pd.options.display.max_rows = 15
  49.  
  50. In [21]: s = pd.Series(a)
  51.  
  52. In [22]: %%timeit
  53. ...: idx = s.rolling(3, min_periods=1).sum().idxmax()
  54. ...: s.loc[idx-2:idx]
  55. ...:
  56. 115 ms ± 7.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
  57.  
  58. In [23]: s.shape
  59. Out[23]: (1000000,)
  60.  
  61. $ python -mtimeit -s 'import numpy as np; a = np.random.randint(10**4, size=10**6); from max_rolling_sum import max_rolling_sum as f' 'f(a, 3)'
  62. 100 loops, best of 3: 2.7 msec per loop
  63. $ python -mtimeit -s 'import numpy as np; a = np.random.randint(10**4, size=10**6); from max_rolling_maxu import max_rolling_sum as f' 'f(a, 3)'
  64. 10 loops, best of 3: 48.1 msec per loop
  65.  
  66. #cython: boundscheck=False
  67.  
  68. ctypedef long array_type
  69.  
  70. cpdef Py_ssize_t max_rolling_sum(array_type[:] arr, Py_ssize_t k) nogil:
  71. """arr[i:i+k].sum() is maximum."""
  72. cdef Py_ssize_t N = arr.shape[0]
  73. if N < 1:
  74. return -1 # error: no sum
  75.  
  76. cdef Py_ssize_t i
  77. cdef array_type sum_ = 0
  78. for i in range(min(k, N)): # find first sum arr[:k].sum()
  79. sum_ += arr[i]
  80. cdef Py_ssize_t max_start = 0
  81. cdef array_type max_sum = sum_
  82. for i in range(k, N): # compute rolling sum arr[i-k+1:i+1].sum()
  83. sum_ -= arr[i - k] # pop (left) from old sum
  84. sum_ += arr[i] # append (right) to new sum
  85. if max_sum < sum_:
  86. max_sum = sum_
  87. max_start = i - k + 1
  88. return max_start
  89.  
  90. import pandas as pd
  91.  
  92. def max_rolling_sum(arr, k):
  93. """arr[i:i+k].sum() is maximum."""
  94. s = pd.Series(arr)
  95. idx = s.rolling(k, min_periods=1).sum().idxmax()
  96. return max(idx - 2, 0)
Add Comment
Please, Sign In to add comment