Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def empty_str_to_null(s):
- """Convert empty strings to None (null)"""
- s.loc[s.str.strip().str.len() == 0] = None
- return s
- foo = pd.Series(np.repeat([1,2,3,'',None,np.NaN, ' '],1E6))
- >>> %time bar = empty_str_to_null(foo)
- CPU times: user 7.67 s, sys: 260 ms, total: 7.93 s
- Wall time: 8.38 s
- def empty_str_to_null_slicer(s):
- a = s.values.astype(str)
- # slicer_vectorized from https://stackoverflow.com/a/39045337/
- mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
- s[mask] = None
- return s
- In [245]: s = pd.Series(np.repeat([1,'',' ',None,np.NaN],2))
- In [246]: s
- Out[246]:
- 0 1
- 1 1
- 2
- 3
- 4
- 5
- 6 None
- 7 None
- 8 NaN
- 9 NaN
- dtype: object
- In [247]: a = s.values.astype(str)
- ...: mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
- ...: s[mask] = None
- ...:
- In [248]: s
- Out[248]:
- 0 1
- 1 1
- 2 None
- 3 None
- 4 None
- 5 None
- 6 None
- 7 None
- 8 NaN
- 9 NaN
- dtype: object
- # Original approach
- def empty_str_to_null(s0):
- s = s0.copy()
- """Convert empty strings to None (null)"""
- s.loc[s.str.strip().str.len() == 0] = None
- return s
- # Proposed approach
- def empty_str_to_null_slicer(s0):
- s = s0.copy()
- a = s.values.astype(str)
- # slicer_vectorized from https://stackoverflow.com/a/39045337/3293881
- mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
- s[mask] = None
- return s
- In [228]: foo = pd.Series(np.repeat([1,'',' ',None,np.NaN],1E6))
- In [229]: %timeit empty_str_to_null(foo)
- 1 loop, best of 3: 4.17 s per loop
- In [230]: %timeit empty_str_to_null_slicer(foo)
- 1 loop, best of 3: 573 ms per loop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement