Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def wmean_ungrouped (d,w):
- return (d.dot(w)).sum() / w.sum()
- def wmean_grouped (group, var_name_in, var_name_weight):
- d = group[var_name_in]
- w = group[var_name_weight]
- return (d * w).sum() / w.sum()
- FUNCS = {
- "mean" : np.mean ,
- "sum" : np.sum ,
- "count" : np.count_nonzero
- }
- def my_summary (
- data ,
- var_names_in ,
- var_names_out ,
- var_functions ,
- var_name_weight = None ,
- var_names_group = None
- ):
- result = DataFrame()
- if var_names_group is not None:
- grouped = data.groupby (var_names_group)
- for var_name_in, var_name_out, var_function in
- zip(var_names_in,var_names_out,var_functions):
- if var_function == "wmean":
- func = lambda x : wmean_grouped (x, var_name_in, var_name_weight)
- result[var_name_out] = Series(grouped.apply(func))
- else:
- func = FUNCS[var_function]
- result[var_name_out] = grouped[var_name_in].apply(func)
- else:
- for var_name_in, var_name_out, var_function in
- zip(var_names_in,var_names_out,var_functions):
- if var_function == "wmean":
- result[var_name_out] =
- Series(wmean_ungrouped(data[var_name_in], data[var_name_weight]))
- else:
- func = FUNCS[var_function]
- result[var_name_out] = Series(func(data[var_name_in]))
- return result
- my_summary (
- data=df,
- var_names_in=["val_2","val_2","val_2","val_2"] ,
- var_names_out=[
- "val_2_c","val_2_s","val_2_m","val_2_wmean"
- ] ,
- var_functions=["count","sum","mean","wmean"] ,
- var_name_weight="val_1"
- ,
- var_names_group=["Region","Category"]
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement