Advertisement
jroakes

Modified Z Score

Oct 25th, 2017
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.85 KB | None | 0 0
  1. # coding: utf-8
  2.  
  3. import os
  4. import sys
  5. import math
  6. from statistics import median
  7. import numpy as np
  8. import pandas as pd
  9.  
  10. in_file = 'data.csv'
  11.  
  12.  
  13. df = pd.read_csv(in_file)
  14. # Round position to tenths
  15. df = df.round({'position': 1})
  16.  
  17.  
  18. def apply_stats(row, df):
  19.     ctr = float(row['ctr'])
  20.     pos = float(row['position'])
  21.    
  22.     # Median
  23.     median_ctr = median(df.ctr[df.position==pos])
  24.     # Mad
  25.     mad_ctr = df.ctr[df.position==pos].mad()
  26.    
  27.     row['score'] = round(float( (0.6745 * (ctr - median_ctr))/mad_ctr ), 3 )
  28.     row['mad'] = mad_ctr
  29.     row['median'] = median_ctr
  30.    
  31.     return row
  32.  
  33.  
  34. # Modified z-score = (constant of 0.6745 * (individual CTR – median CTR of a given position)) / median absolute deviation for a CTR at a given position
  35.  
  36.  
  37. df = df.apply(apply_stats, args=(df,), axis = 1)
  38.  
  39.  
  40. df.to_csv('out' + in_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement