Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from io import StringIO
- def unnest(df, col, col2,reset_index=False):
- col_flat = pd.DataFrame([[i, x]
- for i, y in df[col].apply(list).iteritems()
- for x in y ], columns=['I', col]
- )
- print(col_flat)
- col_flat = col_flat.set_index('I')
- df = df.drop(col, 1)
- df = df.merge(col_flat, left_index=True, right_index=True)
- if reset_index:
- df = df.reset_index(drop=True)
- merchant_product_code = (df['Product code'] + '-' + df[col])
- df['item_group_id'] = df['Product code']
- df['Product code'] = merchant_product_code
- return df
- text = '''Product code,Options
- COWZGH,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)']"
- PHBC,[]
- ORGWBT0,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)', 'Preemie (1.5-2.5kg)']"
- COVWDBC,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)']"
- COVWMV,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)']"
- BZ-168,[]
- ORWNG,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)']"
- ORWNB13,"['Newborn (2.5-6kg)', 'Big Baby (6-9kg)']"
- HBCDTG,[]'''
- # read data from text
- fd = StringIO(text)
- df = pd.read_csv(fd)
- # convert strings to list
- # version A - create empty lists []
- #df['Options'] = df['Options'].apply(eval)
- # version B - create lists with empty string ['']
- df['Options'] = df['Options'].apply(lambda x: eval(x) if eval(x) else [''] )
- # print type for every row
- df['Options'].apply(lambda x:print(type(x), x))
- df2 = unnest(df, 'Options', 'x')
- print(df2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement