Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- import os
- import sys
- import math
- from statistics import median
- import numpy as np
- import pandas as pd
- in_file = 'data.csv'
- df = pd.read_csv(in_file)
- # Round position to tenths
- df = df.round({'position': 1})
- def apply_stats(row, df):
- ctr = float(row['ctr'])
- pos = float(row['position'])
- # Median
- median_ctr = median(df.ctr[df.position==pos])
- # Mad
- mad_ctr = df.ctr[df.position==pos].mad()
- row['score'] = round(float( (0.6745 * (ctr - median_ctr))/mad_ctr ), 3 )
- row['mad'] = mad_ctr
- row['median'] = median_ctr
- return row
- # Modified z-score = (constant of 0.6745 * (individual CTR – median CTR of a given position)) / median absolute deviation for a CTR at a given position
- df = df.apply(apply_stats, args=(df,), axis = 1)
- df.to_csv('out' + in_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement