Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.04 KB | None | 0 0
  1. df = pd.DataFrame(
  2. {'messageLabels': [['Good', 'Other', 'Bad'],['Bad','Terrible']]}
  3. )
  4.  
  5. messageLabels | Good| Other| Bad| Terrible
  6. --------------------------------------------------------
  7. ['Good', 'Other', 'Bad'] | True| True |True| False
  8. --------------------------------------------------------
  9. ['Bad','Terrible'] |False|False |True| True
  10.  
  11. df.join(df.messageLabels.str.join('|').str.get_dummies().astype(bool))
  12.  
  13. messageLabels Bad Good Other Terrible
  14. 0 [Good, Other, Bad] True True True False
  15. 1 [Bad, Terrible] True False False True
  16.  
  17. from sklearn.preprocessing import MultiLabelBinarizer
  18.  
  19. mlb = MultiLabelBinarizer()
  20. dum = mlb.fit_transform(df.messageLabels)
  21.  
  22. df.join(pd.DataFrame(dum.astype(bool), df.index, mlb.classes_))
  23.  
  24. messageLabels Bad Good Other Terrible
  25. 0 [Good, Other, Bad] True True True False
  26. 1 [Bad, Terrible] True False False True
  27.  
  28. n = len(df)
  29. i = np.arange(n)
  30. l = [*map(len, df.messageLabels)]
  31. j, u = pd.factorize(np.concatenate(df.messageLabels))
  32.  
  33. o = np.zeros((n, len(u)), bool)
  34. o[i.repeat(l), j] = True
  35.  
  36. df.join(pd.DataFrame(o, df.index, u))
  37.  
  38. messageLabels Good Other Bad Terrible
  39. 0 [Good, Other, Bad] True True True False
  40. 1 [Bad, Terrible] False False True True
  41.  
  42. df.join(pd.DataFrame([dict.fromkeys(x, True) for x in df.messageLabels]).fillna(False))
  43.  
  44. messageLabels Bad Good Other Terrible
  45. 0 [Good, Other, Bad] True True True False
  46. 1 [Bad, Terrible] True False False True
  47.  
  48. tmp = pd.DataFrame(df['messageLabels'].tolist())
  49. pd.get_dummies(tmp, prefix='', prefix_sep='').max(level=0, axis=1).astype(bool)
  50.  
  51. Bad Good Other Terrible
  52. 0 True True True False
  53. 1 True False False True
  54.  
  55. df.join(pd.get_dummies(tmp, prefix='', prefix_sep='')
  56. .max(level=0, axis=1)
  57. .astype(bool))
  58.  
  59. messageLabels Bad Good Other Terrible
  60. 0 [Good, Other, Bad] True True True False
  61. 1 [Bad, Terrible] True False False True
  62.  
  63. (pd.DataFrame(df['messageLabels'].tolist())
  64. .stack()
  65. .reset_index()
  66. .pivot_table(index='level_0', columns=0, aggfunc='size', fill_value=0)
  67. .astype(bool))
  68.  
  69. 0 Bad Good Other Terrible
  70. level_0
  71. 0 True True True False
  72. 1 True False False True
  73.  
  74. In [11]: pd.get_dummies(df.messageLabels.apply(lambda x: pd.Series(1, x)) == 1)
  75. Out[11]:
  76. Good Other Bad Terrible
  77. 0 True True True False
  78. 1 False False True True
  79.  
  80. In [12]: df.messageLabels.apply(lambda x: pd.Series(1, x))
  81. Out[12]:
  82. Good Other Bad Terrible
  83. 0 1.0 1.0 1.0 NaN
  84. 1 NaN NaN 1.0 1.0
  85.  
  86. In [21]: res = pd.get_dummies(df.messageLabels.apply(lambda x: pd.Series(1, x)) == 1)
  87.  
  88. In [22]: df[res.columns] = res
  89.  
  90. In [23]: df
  91. Out[23]:
  92. messageLabels Good Other Bad Terrible
  93. 0 [Good, Other, Bad] True True True False
  94. 1 [Bad, Terrible] False False True True
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement