Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.63 KB | None | 0 0
  1. def empty_str_to_null(s):
  2. """Convert empty strings to None (null)"""
  3. s.loc[s.str.strip().str.len() == 0] = None
  4. return s
  5.  
  6. foo = pd.Series(np.repeat([1,2,3,'',None,np.NaN, ' '],1E6))
  7.  
  8. >>> %time bar = empty_str_to_null(foo)
  9.  
  10. CPU times: user 7.67 s, sys: 260 ms, total: 7.93 s
  11. Wall time: 8.38 s
  12.  
  13. def empty_str_to_null_slicer(s):
  14. a = s.values.astype(str)
  15. # slicer_vectorized from https://stackoverflow.com/a/39045337/
  16. mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
  17. s[mask] = None
  18. return s
  19.  
  20. In [245]: s = pd.Series(np.repeat([1,'',' ',None,np.NaN],2))
  21.  
  22. In [246]: s
  23. Out[246]:
  24. 0 1
  25. 1 1
  26. 2
  27. 3
  28. 4
  29. 5
  30. 6 None
  31. 7 None
  32. 8 NaN
  33. 9 NaN
  34. dtype: object
  35.  
  36. In [247]: a = s.values.astype(str)
  37. ...: mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
  38. ...: s[mask] = None
  39. ...:
  40.  
  41. In [248]: s
  42. Out[248]:
  43. 0 1
  44. 1 1
  45. 2 None
  46. 3 None
  47. 4 None
  48. 5 None
  49. 6 None
  50. 7 None
  51. 8 NaN
  52. 9 NaN
  53. dtype: object
  54.  
  55. # Original approach
  56. def empty_str_to_null(s0):
  57. s = s0.copy()
  58. """Convert empty strings to None (null)"""
  59. s.loc[s.str.strip().str.len() == 0] = None
  60. return s
  61.  
  62. # Proposed approach
  63. def empty_str_to_null_slicer(s0):
  64. s = s0.copy()
  65. a = s.values.astype(str)
  66. # slicer_vectorized from https://stackoverflow.com/a/39045337/3293881
  67. mask = (slicer_vectorized(a,0,1)==' ') | (a=='')
  68. s[mask] = None
  69. return s
  70.  
  71. In [228]: foo = pd.Series(np.repeat([1,'',' ',None,np.NaN],1E6))
  72.  
  73. In [229]: %timeit empty_str_to_null(foo)
  74. 1 loop, best of 3: 4.17 s per loop
  75.  
  76. In [230]: %timeit empty_str_to_null_slicer(foo)
  77. 1 loop, best of 3: 573 ms per loop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement