Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- def reject_outliers(data):
- m = 2
- u = np.mean(data)
- s = np.std(data)
- filtered = [e for e in data if (u - 2 * s < e < u + 2 * s)]
- return filtered
- >>> d = [2,4,5,1,6,5,40]
- >>> filtered_d = reject_outliers(d)
- >>> print filtered_d
- [2,4,5,1,6,5]
- def reject_outliers(data, m=2):
- return data[abs(data - np.mean(data)) < m * np.std(data)]
- def reject_outliers(data, m = 2.):
- d = np.abs(data - np.median(data))
- mdev = np.median(d)
- s = d/mdev if mdev else 0.
- return data[s<m]
- def reject_outliers(sr, iq_range=0.5):
- pcnt = (1 - iq_range) / 2
- qlow, median, qhigh = sr.dropna().quantile([pcnt, 0.50, 1-pcnt])
- iqr = qhigh - qlow
- return sr[ (sr - median).abs() <= iqr]
- import numpy as np
- # Create some random numbers
- x = np.random.normal(5, 2, 1000)
- # Calculate the statistics
- print("Mean= ", np.mean(x))
- print("Median= ", np.median(x))
- print("Max/Min=", x.max(), " ", x.min())
- print("StdDev=", np.std(x))
- print("90th Percentile", np.percentile(x, 90))
- # Add a few large points
- x[10] += 1000
- x[20] += 2000
- x[30] += 1500
- # Recalculate the statistics
- print()
- print("Mean= ", np.mean(x))
- print("Median= ", np.median(x))
- print("Max/Min=", x.max(), " ", x.min())
- print("StdDev=", np.std(x))
- print("90th Percentile", np.percentile(x, 90))
- # Measure the percentile intervals and then estimate Standard Deviation of the distribution, both from median to the 90th percentile and from the 10th to 90th percentile
- p90 = np.percentile(x, 90)
- p10 = np.percentile(x, 10)
- p50 = np.median(x)
- # p50 to p90 is 1.2815 sigma
- rSig = (p90-p50)/1.2815
- print("Robust Sigma=", rSig)
- rSig = (p90-p10)/(2*1.2815)
- print("Robust Sigma=", rSig)
- Mean= 4.99760520022
- Median= 4.95395274981
- Max/Min= 11.1226494654 -2.15388472011
- Sigma= 1.976629928
- 90th Percentile 7.52065379649
- Mean= 9.64760520022
- Median= 4.95667658782
- Max/Min= 2205.43861943 -2.15388472011
- Sigma= 88.6263902244
- 90th Percentile 7.60646688694
- Robust Sigma= 2.06772555531
- Robust Sigma= 1.99878292462
- y = x[abs(x - p50) < rSig*5]
- # Print the statistics again
- print("Mean= ", np.mean(y))
- print("Median= ", np.median(y))
- print("Max/Min=", y.max(), " ", y.min())
- print("StdDev=", np.std(y))
- Mean= 4.99755359935
- Median= 4.95213030447
- Max/Min= 11.1226494654 -2.15388472011
- StdDev= 1.97692712883
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement