Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- __author__='Semyon Teplisky'
- import functools
- import pandas as pd
- import numpy as np
- raw_data = [{'campaign_id':1111,'medium_id':'msn','campaign_name': 'viral #1', 'clicks':100, 'ctr':0.7},
- {'campaign_id':1111,'medium_id':'aol','campaign_name': 'viral #1', 'clicks':250, 'ctr':0.4},
- {'campaign_id':1111,'medium_id':'google','campaign_name': 'viral #1', 'clicks':500, 'ctr':0.1}]
- data = pd.DataFrame(raw_data)
- # Option 1 Advanced with functools
- def wavg(val, df, weight):
- try:
- w = df.ix[val.index][weight]
- return (val * w).sum() / w.sum()
- except ZeroDivisionError:
- return val.mean()
- # Binding df to function with functools
- fwavg = functools.partial(wavg, df=data, weight='clicks')
- gdata = data.groupby(['campaign_id']).agg({"conversions":np.sum, "impressions":np.sum,
- "spend":np.sum,"clicks":np.sum, "ctr":fwavg}).reset_index()
- gdata.fillna(0.0, inplace=True)
- gdata.head()
- # Option 2 Math Workaround
- data['ctr'] = data['clicks'] * data['ctr']
- gdata = data.groupby(['campaign_id']).agg({"conversions":np.sum, "impressions":np.sum,
- "spend":np.sum,"clicks":np.sum, "ctr":np.sum}).reset_index()
- gdata['ctr'] = gdata['ctr'] / gdata['clicks']
- gdata.head()
- # Option 3 Two DF
- def wavg(group, avg_name, weight_name):
- val = group[avg_name]
- w = group[weight_name]
- try:
- return (val * w).sum() / w.sum()
- except ZeroDivisionError:
- return val.mean()
- gdata = data.groupby(['campaign_id'])
- data_ctr = gdata.apply(wavg, "ctr", "clicks")
- data_meta = gdata.agg({"conversions":np.sum, "impressions":np.sum, "spend":np.sum,"clicks":np.sum})
- gdata = pd.merge(data_meta, data_ctr, on='campaign_id')
- gdata.head()
Add Comment
Please, Sign In to add comment