Advertisement
Guest User

Untitled

a guest
Aug 17th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.04 KB | None | 0 0
  1. import numpy as np
  2. from scipy import stats
  3. from itertools import combinations
  4. from statsmodels.stats.multitest import multipletests
  5. from statsmodels.stats.libqsturng import psturng
  6. import warnings
  7.  
  8.  
  9. def kw_nemenyi(groups, to_compare=None, alpha=0.05, method='tukey'):
  10. """
  11.  
  12. Kruskal-Wallis 1-way ANOVA with Nemenyi's multiple comparison test
  13.  
  14. Arguments:
  15. ---------------
  16. groups: sequence
  17. arrays corresponding to k mutually independent samples from
  18. continuous populations
  19.  
  20. to_compare: sequence
  21. tuples specifying the indices of pairs of groups to compare, e.g.
  22. [(0, 1), (0, 2)] would compare group 0 with 1 & 2. by default, all
  23. possible pairwise comparisons between groups are performed.
  24.  
  25. alpha: float
  26. family-wise error rate used for correcting for multiple comparisons
  27. (see statsmodels.stats.multitest.multipletests for details)
  28.  
  29. method: string
  30. the null distribution of the test statistic used to determine the
  31. corrected p-values for each pair of groups, can be either "tukey"
  32. (studentized range) or "chisq" (Chi-squared). the "chisq" method will
  33. correct for tied ranks.
  34.  
  35. Returns:
  36. ---------------
  37. H: float
  38. Kruskal-Wallis H-statistic
  39.  
  40. p_omnibus: float
  41. p-value corresponding to the global null hypothesis that the medians of
  42. the groups are all equal
  43.  
  44. p_corrected: float array
  45. corrected p-values for each pairwise comparison, corresponding to the
  46. null hypothesis that the pair of groups has equal medians. note that
  47. these are only meaningful if the global null hypothesis is rejected.
  48.  
  49. reject: bool array
  50. True for pairs where the null hypothesis can be rejected for the given
  51. alpha
  52.  
  53. Reference:
  54. ---------------
  55.  
  56. """
  57.  
  58. # omnibus test (K-W ANOVA)
  59. # -------------------------------------------------------------------------
  60.  
  61. if method is None:
  62. method = 'chisq'
  63. elif method not in ('tukey', 'chisq'):
  64. raise ValueError('method must be either "tukey" or "chisq"')
  65.  
  66. groups = [np.array(gg) for gg in groups]
  67.  
  68. k = len(groups)
  69.  
  70. n = np.array([len(gg) for gg in groups])
  71. if np.any(n < 5):
  72. warnings.warn("Sample sizes < 5 are not recommended (K-W test assumes "
  73. "a chi square distribution)")
  74.  
  75. allgroups = np.concatenate(groups)
  76. N = len(allgroups)
  77. ranked = stats.rankdata(allgroups)
  78.  
  79. # correction factor for ties
  80. T = stats.tiecorrect(ranked)
  81. if T == 0:
  82. raise ValueError('All numbers are identical in kruskal')
  83.  
  84. # sum of ranks for each group
  85. j = np.insert(np.cumsum(n), 0, 0)
  86. R = np.empty(k, dtype=np.float)
  87. for ii in range(k):
  88. R[ii] = ranked[j[ii]:j[ii + 1]].sum()
  89.  
  90. # the Kruskal-Wallis H-statistic
  91. H = (12. / (N * (N + 1.))) * ((R ** 2.) / n).sum() - 3 * (N + 1)
  92.  
  93. # apply correction factor for ties
  94. H /= T
  95.  
  96. df_omnibus = k - 1
  97. p_omnibus = stats.chisqprob(H, df_omnibus)
  98.  
  99. # multiple comparisons
  100. # -------------------------------------------------------------------------
  101.  
  102. # by default we compare every possible pair of groups
  103. if to_compare is None:
  104. to_compare = tuple(combinations(range(k), 2))
  105.  
  106. ncomp = len(to_compare)
  107.  
  108. dif = np.empty(ncomp, dtype=np.float)
  109. B = np.empty(ncomp, dtype=np.float)
  110.  
  111. Rmean = R / n
  112. A = N * (N + 1) / 12.
  113.  
  114. for pp, (ii, jj) in enumerate(to_compare):
  115.  
  116. # absolute difference of mean ranks
  117. dif[pp] = np.abs(Rmean[ii] - Rmean[jj])
  118. B[pp] = (1. / n[ii]) + (1. / n[jj])
  119.  
  120. if method == 'tukey':
  121.  
  122. # p-values obtained from the upper quantiles of the studentized range
  123. # distribution
  124. qval = dif / np.sqrt(A * B)
  125. p_corrected = psturng(qval * np.sqrt(2), k, 1E6)
  126.  
  127. elif method == 'chisq':
  128.  
  129. # p-values obtained from the upper quantiles of the chi-squared
  130. # distribution
  131. chi2 = (dif ** 2.) / (A * B)
  132. p_corrected = stats.chisqprob(chi2 * T, k - 1)
  133.  
  134. reject = p_corrected <= alpha
  135.  
  136. return H, p_omnibus, p_corrected, reject
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement