Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def _get_products(merchant_name):
- full_df = unnest(full_df,'Options','product code')
- return _cleanup_products_df_columns(full_df)
- def unnest(df, col, col2, reset_index = False):
- for item in df[col]:
- if len(item) == 0:
- item.append('[]')
- col_flat = pd.DataFrame([[i, x]
- for i, y in df[col].apply(list).iteritems()
- for x in y ], columns=['I', col]
- )
- col_flat = col_flat.set_index('I')
- print("col_flat::",col_flat)
- df = df.drop(col, 1)
- df = df.merge(col_flat, left_index=True, right_index=True)
- df['item_group_id'] = df['Product code']
- df['Product code'] += df[col].apply(lambda val: '' if val == "[]" else '-' + val)
- return df
- def _cleanup_products_df_columns(products_df):
- columns_map = merge_dictionary_and_list({
- 'product code': 'merchant_product_id',
- 'product name': 'name',
- 'sale price': 'sale_price',
- 'product url': 'web_url',
- 'item_group_id':'item_group_id'
- }, ['category', 'description', 'image_urls', 'price', 'quantity', 'options'])
- products_df.columns = map(str.lower, products_df.columns)
- products_df = products_df.loc[:, products_df.columns.isin(list(columns_map.keys()))].reset_index(drop=True)
- products_df = products_df.rename(columns=columns_map)
- products_df['description'] = products_df['description'].apply(clean_description)
- products_df.drop_duplicates(subset=['merchant_product_id'], keep='first', inplace=True)
- return products_df.dropna(subset=['merchant_product_id', 'name'])
- def main():
- MERCHANT_IDS = [1, 2]
- for idx in MERCHANT_IDS:
- merchant = get_merchant_by_id(idx)
- products_df = _get_products(merchant.name)
- products_df.to_csv(merchant.name + '.csv', encoding='utf-8', index=False)
Add Comment
Please, Sign In to add comment