Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.DataFrame(
- {'messageLabels': [['Good', 'Other', 'Bad'],['Bad','Terrible']]}
- )
- messageLabels | Good| Other| Bad| Terrible
- --------------------------------------------------------
- ['Good', 'Other', 'Bad'] | True| True |True| False
- --------------------------------------------------------
- ['Bad','Terrible'] |False|False |True| True
- df.join(df.messageLabels.str.join('|').str.get_dummies().astype(bool))
- messageLabels Bad Good Other Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] True False False True
- from sklearn.preprocessing import MultiLabelBinarizer
- mlb = MultiLabelBinarizer()
- dum = mlb.fit_transform(df.messageLabels)
- df.join(pd.DataFrame(dum.astype(bool), df.index, mlb.classes_))
- messageLabels Bad Good Other Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] True False False True
- n = len(df)
- i = np.arange(n)
- l = [*map(len, df.messageLabels)]
- j, u = pd.factorize(np.concatenate(df.messageLabels))
- o = np.zeros((n, len(u)), bool)
- o[i.repeat(l), j] = True
- df.join(pd.DataFrame(o, df.index, u))
- messageLabels Good Other Bad Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] False False True True
- df.join(pd.DataFrame([dict.fromkeys(x, True) for x in df.messageLabels]).fillna(False))
- messageLabels Bad Good Other Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] True False False True
- tmp = pd.DataFrame(df['messageLabels'].tolist())
- pd.get_dummies(tmp, prefix='', prefix_sep='').max(level=0, axis=1).astype(bool)
- Bad Good Other Terrible
- 0 True True True False
- 1 True False False True
- df.join(pd.get_dummies(tmp, prefix='', prefix_sep='')
- .max(level=0, axis=1)
- .astype(bool))
- messageLabels Bad Good Other Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] True False False True
- (pd.DataFrame(df['messageLabels'].tolist())
- .stack()
- .reset_index()
- .pivot_table(index='level_0', columns=0, aggfunc='size', fill_value=0)
- .astype(bool))
- 0 Bad Good Other Terrible
- level_0
- 0 True True True False
- 1 True False False True
- In [11]: pd.get_dummies(df.messageLabels.apply(lambda x: pd.Series(1, x)) == 1)
- Out[11]:
- Good Other Bad Terrible
- 0 True True True False
- 1 False False True True
- In [12]: df.messageLabels.apply(lambda x: pd.Series(1, x))
- Out[12]:
- Good Other Bad Terrible
- 0 1.0 1.0 1.0 NaN
- 1 NaN NaN 1.0 1.0
- In [21]: res = pd.get_dummies(df.messageLabels.apply(lambda x: pd.Series(1, x)) == 1)
- In [22]: df[res.columns] = res
- In [23]: df
- Out[23]:
- messageLabels Good Other Bad Terrible
- 0 [Good, Other, Bad] True True True False
- 1 [Bad, Terrible] False False True True
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement