Guest User

Untitled

a guest
Jul 24th, 2018
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.85 KB | None | 0 0
  1. def _get_products(merchant_name):
  2.     full_df = unnest(full_df,'Options','product code')
  3.     return _cleanup_products_df_columns(full_df)
  4.    
  5. def unnest(df, col, col2, reset_index = False):
  6.    for item in df[col]:
  7.         if len(item) == 0:
  8.             item.append('[]')
  9.  
  10.    col_flat = pd.DataFrame([[i, x]
  11.                       for i, y in df[col].apply(list).iteritems()
  12.                           for x in y ], columns=['I', col]
  13.                            )
  14.    col_flat = col_flat.set_index('I')
  15.    print("col_flat::",col_flat)
  16.    df = df.drop(col, 1)
  17.    df = df.merge(col_flat, left_index=True, right_index=True)
  18.    df['item_group_id'] = df['Product code']
  19.    df['Product code'] += df[col].apply(lambda val: '' if val == "[]" else '-' + val)
  20.  
  21.    return df
  22.    
  23.  def _cleanup_products_df_columns(products_df):
  24.     columns_map = merge_dictionary_and_list({
  25.         'product code': 'merchant_product_id',
  26.         'product name': 'name',
  27.         'sale price': 'sale_price',
  28.         'product url': 'web_url',
  29.         'item_group_id':'item_group_id'
  30.     }, ['category', 'description', 'image_urls', 'price', 'quantity', 'options'])
  31.  
  32.     products_df.columns = map(str.lower, products_df.columns)
  33.     products_df = products_df.loc[:, products_df.columns.isin(list(columns_map.keys()))].reset_index(drop=True)
  34.     products_df = products_df.rename(columns=columns_map)
  35.  
  36.     products_df['description'] = products_df['description'].apply(clean_description)
  37.     products_df.drop_duplicates(subset=['merchant_product_id'], keep='first', inplace=True)
  38.  
  39.     return products_df.dropna(subset=['merchant_product_id', 'name'])
  40.    
  41. def main():
  42.     MERCHANT_IDS = [1, 2]
  43.     for idx in MERCHANT_IDS:
  44.         merchant = get_merchant_by_id(idx)
  45.         products_df = _get_products(merchant.name)
  46.         products_df.to_csv(merchant.name + '.csv', encoding='utf-8', index=False)
Add Comment
Please, Sign In to add comment