Advertisement
Guest User

Untitled

a guest
Jul 20th, 2017
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.28 KB | None | 0 0
  1. import numpy as np
  2.  
  3. def reject_outliers(data):
  4. m = 2
  5. u = np.mean(data)
  6. s = np.std(data)
  7. filtered = [e for e in data if (u - 2 * s < e < u + 2 * s)]
  8. return filtered
  9.  
  10. >>> d = [2,4,5,1,6,5,40]
  11. >>> filtered_d = reject_outliers(d)
  12. >>> print filtered_d
  13. [2,4,5,1,6,5]
  14.  
  15. def reject_outliers(data, m=2):
  16. return data[abs(data - np.mean(data)) < m * np.std(data)]
  17.  
  18. def reject_outliers(data, m = 2.):
  19. d = np.abs(data - np.median(data))
  20. mdev = np.median(d)
  21. s = d/mdev if mdev else 0.
  22. return data[s<m]
  23.  
  24. def reject_outliers(sr, iq_range=0.5):
  25. pcnt = (1 - iq_range) / 2
  26. qlow, median, qhigh = sr.dropna().quantile([pcnt, 0.50, 1-pcnt])
  27. iqr = qhigh - qlow
  28. return sr[ (sr - median).abs() <= iqr]
  29.  
  30. import numpy as np
  31.  
  32. # Create some random numbers
  33. x = np.random.normal(5, 2, 1000)
  34.  
  35. # Calculate the statistics
  36. print("Mean= ", np.mean(x))
  37. print("Median= ", np.median(x))
  38. print("Max/Min=", x.max(), " ", x.min())
  39. print("StdDev=", np.std(x))
  40. print("90th Percentile", np.percentile(x, 90))
  41.  
  42. # Add a few large points
  43. x[10] += 1000
  44. x[20] += 2000
  45. x[30] += 1500
  46.  
  47. # Recalculate the statistics
  48. print()
  49. print("Mean= ", np.mean(x))
  50. print("Median= ", np.median(x))
  51. print("Max/Min=", x.max(), " ", x.min())
  52. print("StdDev=", np.std(x))
  53. print("90th Percentile", np.percentile(x, 90))
  54.  
  55. # Measure the percentile intervals and then estimate Standard Deviation of the distribution, both from median to the 90th percentile and from the 10th to 90th percentile
  56. p90 = np.percentile(x, 90)
  57. p10 = np.percentile(x, 10)
  58. p50 = np.median(x)
  59. # p50 to p90 is 1.2815 sigma
  60. rSig = (p90-p50)/1.2815
  61. print("Robust Sigma=", rSig)
  62.  
  63. rSig = (p90-p10)/(2*1.2815)
  64. print("Robust Sigma=", rSig)
  65.  
  66. Mean= 4.99760520022
  67. Median= 4.95395274981
  68. Max/Min= 11.1226494654 -2.15388472011
  69. Sigma= 1.976629928
  70. 90th Percentile 7.52065379649
  71.  
  72. Mean= 9.64760520022
  73. Median= 4.95667658782
  74. Max/Min= 2205.43861943 -2.15388472011
  75. Sigma= 88.6263902244
  76. 90th Percentile 7.60646688694
  77.  
  78. Robust Sigma= 2.06772555531
  79. Robust Sigma= 1.99878292462
  80.  
  81. y = x[abs(x - p50) < rSig*5]
  82.  
  83. # Print the statistics again
  84. print("Mean= ", np.mean(y))
  85. print("Median= ", np.median(y))
  86. print("Max/Min=", y.max(), " ", y.min())
  87. print("StdDev=", np.std(y))
  88.  
  89. Mean= 4.99755359935
  90. Median= 4.95213030447
  91. Max/Min= 11.1226494654 -2.15388472011
  92. StdDev= 1.97692712883
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement