Advertisement
Guest User

Untitled

a guest
Jul 16th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.91 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4.  
  5. import sys
  6. import os
  7. import numpy as np
  8. import pandas as pd
  9. import scipy.stats as ss
  10.  
  11. def rank_INT(series, c=3.0/8, stochastic=True):
  12. """ Perform rank-based inverse normal transformation on pandas series.
  13. If stochastic is True ties are given rank randomly, otherwise ties will
  14. share the same value. NaN values are ignored.
  15. Args:
  16. param1 (pandas.Series): Series of values to transform
  17. param2 (Optional[float]): Constand parameter (Bloms constant)
  18. param3 (Optional[bool]): Whether to randomise rank of ties
  19.  
  20. Returns:
  21. pandas.Series
  22. """
  23.  
  24. # Check input
  25. assert(isinstance(series, pd.Series))
  26. assert(isinstance(c, float))
  27. assert(isinstance(stochastic, bool))
  28.  
  29. # Set seed
  30. np.random.seed(123)
  31.  
  32. # Take original series indexes
  33. orig_idx = series.index
  34.  
  35. # Drop NaNs
  36. series = series.loc[~pd.isnull(series)]
  37.  
  38. # Get ranks
  39. if stochastic == True:
  40. # Shuffle by index
  41. series = series.loc[np.random.permutation(series.index)]
  42. # Get rank, ties are determined by their position in the series (hence
  43. # why we randomised the series)
  44. rank = ss.rankdata(series, method="ordinal")
  45. else:
  46. # Get rank, ties are averaged
  47. rank = ss.rankdata(series, method="average")
  48.  
  49. # Convert numpy array back to series
  50. rank = pd.Series(rank, index=series.index)
  51.  
  52. # Convert rank to normal distribution
  53. transformed = rank.apply(rank_to_normal, c=c, n=len(rank))
  54.  
  55. return transformed[orig_idx]
  56.  
  57. def rank_to_normal(rank, c, n):
  58. # Standard quantile function
  59. x = (rank - c) / (n - 2*c + 1)
  60. return ss.norm.ppf(x)
  61.  
  62. def test():
  63.  
  64. # Test
  65. s = pd.Series([2, 1, 1, np.nan, 4, 3], index=["a", "b", "c", "d", "e", "f"])
  66. res = rank_INT(s, stochastic=True)
  67. print res
  68.  
  69. return 0
  70.  
  71. if __name__ == '__main__':
  72.  
  73. test()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement