Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- In [109]: s = pd.Series([1,2,3,4,6,3,4,0,2])
- In [110]: s
- Out[110]:
- 0 1
- 1 2
- 2 3
- 3 4
- 4 6
- 5 3
- 6 4
- 7 0
- 8 2
- dtype: int64
- In [111]: s.rolling(3, min_periods=1).sum()
- Out[111]:
- 0 1.0
- 1 3.0
- 2 6.0
- 3 9.0
- 4 13.0
- 5 13.0
- 6 13.0
- 7 7.0
- 8 6.0
- dtype: float64
- In [112]: idx = s.rolling(3, min_periods=1).sum().idxmax()
- In [113]: idx
- Out[113]: 4
- In [114]: s.loc[idx-2:idx]
- Out[114]:
- 2 3
- 3 4
- 4 6
- dtype: int64
- In [18]: a = np.random.randint(10**4, size=10**6)
- In [19]: a
- Out[19]: array([9918, 4299, 7829, ..., 7513, 3367, 7140])
- In [20]: pd.options.display.max_rows = 15
- In [21]: s = pd.Series(a)
- In [22]: %%timeit
- ...: idx = s.rolling(3, min_periods=1).sum().idxmax()
- ...: s.loc[idx-2:idx]
- ...:
- 115 ms ± 7.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
- In [23]: s.shape
- Out[23]: (1000000,)
- $ python -mtimeit -s 'import numpy as np; a = np.random.randint(10**4, size=10**6); from max_rolling_sum import max_rolling_sum as f' 'f(a, 3)'
- 100 loops, best of 3: 2.7 msec per loop
- $ python -mtimeit -s 'import numpy as np; a = np.random.randint(10**4, size=10**6); from max_rolling_maxu import max_rolling_sum as f' 'f(a, 3)'
- 10 loops, best of 3: 48.1 msec per loop
- #cython: boundscheck=False
- ctypedef long array_type
- cpdef Py_ssize_t max_rolling_sum(array_type[:] arr, Py_ssize_t k) nogil:
- """arr[i:i+k].sum() is maximum."""
- cdef Py_ssize_t N = arr.shape[0]
- if N < 1:
- return -1 # error: no sum
- cdef Py_ssize_t i
- cdef array_type sum_ = 0
- for i in range(min(k, N)): # find first sum arr[:k].sum()
- sum_ += arr[i]
- cdef Py_ssize_t max_start = 0
- cdef array_type max_sum = sum_
- for i in range(k, N): # compute rolling sum arr[i-k+1:i+1].sum()
- sum_ -= arr[i - k] # pop (left) from old sum
- sum_ += arr[i] # append (right) to new sum
- if max_sum < sum_:
- max_sum = sum_
- max_start = i - k + 1
- return max_start
- import pandas as pd
- def max_rolling_sum(arr, k):
- """arr[i:i+k].sum() is maximum."""
- s = pd.Series(arr)
- idx = s.rolling(k, min_periods=1).sum().idxmax()
- return max(idx - 2, 0)
Add Comment
Please, Sign In to add comment