Guest User

Untitled

a guest
Apr 26th, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.34 KB | None | 0 0
  1. df = pd.DataFrame({'A': ['x1','x2','x3', 'x4'], 'B':[['v1','v2'],['v3','v4'],['v5','v6'],['v7','v8']], 'C':[['c1','c2'],['c3','c4'],['c5','c6'],['c7','c8']],'D':[['d1','d2'],['d3','d4'],['d5','d6'],['d7','d8']], 'E':[['e1','e2'],['e3','e4'],['e5','e6'],['e7','e8']]})
  2. A B C D E
  3. 0 x1 [v1, v2] [c1, c2] [d1, d2] [e1, e2]
  4. 1 x2 [v3, v4] [c3, c4] [d3, d4] [e3, e4]
  5. 2 x3 [v5, v6] [c5, c6] [d5, d6] [e5, e6]
  6. 3 x4 [v7, v8] [c7, c8] [d7, d8] [e7, e8]
  7.  
  8. A B C D E
  9. 0 x1 v1 c1 d1 e1
  10. 0 x1 v2 c2 d2 e2
  11. 1 x2 v3 c3 d3 e3
  12. 1 x2 v4 c4 d4 e4
  13. .....
  14.  
  15. def explode(df, lst_cols, fill_value=''):
  16. # make sure `lst_cols` is a list
  17. if lst_cols and not isinstance(lst_cols, list):
  18. lst_cols = [lst_cols]
  19. # all columns except `lst_cols`
  20. idx_cols = df.columns.difference(lst_cols)
  21.  
  22. # calculate lengths of lists
  23. lens = df[lst_cols[0]].str.len()
  24.  
  25. if (lens > 0).all():
  26. # ALL lists in cells aren't empty
  27. return pd.DataFrame({
  28. col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
  29. for col in idx_cols
  30. }).assign(**{col:np.concatenate(df[col].values) for col in lst_cols})
  31. .loc[:, df.columns]
  32. else:
  33. # at least one list in cells is empty
  34. return pd.DataFrame({
  35. col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
  36. for col in idx_cols
  37. }).assign(**{col:np.concatenate(df[col].values) for col in lst_cols})
  38. .append(df.loc[lens==0, idx_cols]).fillna(fill_value)
  39. .loc[:, df.columns]
  40.  
  41. In [82]: explode(df, lst_cols=list('BCDE'))
  42. Out[82]:
  43. A B C D E
  44. 0 x1 v1 c1 d1 e1
  45. 1 x1 v2 c2 d2 e2
  46. 2 x2 v3 c3 d3 e3
  47. 3 x2 v4 c4 d4 e4
  48. 4 x3 v5 c5 d5 e5
  49. 5 x3 v6 c6 d6 e6
  50. 6 x4 v7 c7 d7 e7
  51. 7 x4 v8 c8 d8 e8
  52.  
  53. In [1253]: (df.set_index('A')
  54. .apply(lambda x: x.apply(pd.Series).stack())
  55. .reset_index()
  56. .drop('level_1', 1))
  57. Out[1253]:
  58. A B C D E
  59. 0 x1 v1 c1 d1 e1
  60. 1 x1 v2 c2 d2 e2
  61. 2 x2 v3 c3 d3 e3
  62. 3 x2 v4 c4 d4 e4
  63. 4 x3 v5 c5 d5 e5
  64. 5 x3 v6 c6 d6 e6
  65. 6 x4 v7 c7 d7 e7
  66. 7 x4 v8 c8 d8 e8
Add Comment
Please, Sign In to add comment