Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.51 KB | None | 0 0
  1. import numpy as np
  2. from scipy.stats import wilcoxon
  3. from scipy.stats import rankdata
  4. from scipy.stats import norm
  5. from operator import itemgetter
  6. import math
  7.  
  8. def calculate_mean(dataset1, dataset2):
  9.     smaller_dataset = min(dataset1.size , dataset2.size)
  10.     wilcox_mean = ( np.size(smaller_dataset) * ( np.size(dataset1) + np.size(dataset2) + 1 ) )/2
  11.     return wilcox_mean
  12.  
  13. def calculate_std(dataset1,dataset2):
  14.     dataset1 = np.array(dataset1)
  15.     dataset2 = np.array(dataset2)
  16.     std_w = np.sqrt( (dataset1.size * dataset2.size * (dataset1.size + dataset2.size + 1))/12 )
  17.     return std_w
  18.  
  19. def get_z(mu_w, w_stat,std_w):
  20.     return (w_stat - mu_w)/std_w
  21.  
  22. def apply_randomization(dataset,n_group):
  23.     results = np.zeros(10000,dtype='float32')
  24.     for i in range(np.size(results)):
  25.         np.random.shuffle(dataset)
  26.         results[i] = np.mean(dataset[:n_group]) - np.mean(dataset[-n_group:])
  27.     return results
  28.  
  29. def main():
  30.     asian = np.array([9.84, 9.40, 8.20, 8.24, 9.20, 8.55, 8.52, 8.12])
  31.     caucasian = np.array([8.27, 8.20, 8.25, 8.14, 9.00, 8.10, 7.20, 8.32, 7.70])
  32.  
  33.     asian_vals = []
  34.     for num in asian:
  35.         asian_vals.append( ('a',num) )
  36.    
  37.     caucasian_vals = []
  38.     for num in caucasian:
  39.         caucasian_vals.append( ('c',num) )
  40.  
  41.     full_dataset = asian_vals + caucasian_vals
  42.     full_dataset.sort(key=itemgetter(1))
  43.     print(full_dataset)
  44.  
  45.     nums = []
  46.     for tup in full_dataset:
  47.         nums.append(tup[1])
  48.    
  49.     ranked_nums = rankdata(nums)
  50.  
  51.     asian_w_stat = 0
  52.     caucasian_w_stat = 0
  53.     for rank, tup in zip(ranked_nums, full_dataset):
  54.         if(tup[0] == 'a'):
  55.             asian_w_stat += rank
  56.         elif(tup[0] == 'c'):
  57.             caucasian_w_stat += rank
  58.  
  59.     # We need the smaller w_stat
  60.     small_w = min(asian_w_stat,caucasian_w_stat)
  61.     print(small_w)
  62.  
  63.     # Get the Mean Value of the Wilcox Sum
  64.     mu_w = calculate_mean(asian,caucasian)
  65.     print(mu_w)
  66.  
  67.     # Calculate STD_W
  68.     std_w = calculate_std(asian,caucasian)
  69.     print(std_w)
  70.  
  71.     z_val = get_z(mu_w,small_w,std_w)
  72.     print(z_val)
  73.  
  74.     # -- Apply Randomization --
  75.  
  76.     dataset_nums = []
  77.     for li in full_dataset:
  78.         dataset_nums.append(li[1])
  79.  
  80.     n_group = np.size(asian)
  81.  
  82.     results = apply_randomization(dataset_nums,n_group)
  83.  
  84.     # -- Calculate the P-Value
  85.     diff_means = np.mean(asian) - np.mean(caucasian)
  86.     p_val = np.sum( np.absolute(results) >=  abs(diff_means))/np.size(results)
  87.  
  88.     print(p_val)
  89.  
  90. if __name__ == '__main__':
  91.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement