Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from scipy.stats import wilcoxon
- from scipy.stats import rankdata
- from scipy.stats import norm
- from operator import itemgetter
- import math
- def calculate_mean(dataset1, dataset2):
- smaller_dataset = min(dataset1.size , dataset2.size)
- wilcox_mean = ( np.size(smaller_dataset) * ( np.size(dataset1) + np.size(dataset2) + 1 ) )/2
- return wilcox_mean
- def calculate_std(dataset1,dataset2):
- dataset1 = np.array(dataset1)
- dataset2 = np.array(dataset2)
- std_w = np.sqrt( (dataset1.size * dataset2.size * (dataset1.size + dataset2.size + 1))/12 )
- return std_w
- def get_z(mu_w, w_stat,std_w):
- return (w_stat - mu_w)/std_w
- def apply_randomization(dataset,n_group):
- results = np.zeros(10000,dtype='float32')
- for i in range(np.size(results)):
- np.random.shuffle(dataset)
- results[i] = np.mean(dataset[:n_group]) - np.mean(dataset[-n_group:])
- return results
- def main():
- asian = np.array([9.84, 9.40, 8.20, 8.24, 9.20, 8.55, 8.52, 8.12])
- caucasian = np.array([8.27, 8.20, 8.25, 8.14, 9.00, 8.10, 7.20, 8.32, 7.70])
- asian_vals = []
- for num in asian:
- asian_vals.append( ('a',num) )
- caucasian_vals = []
- for num in caucasian:
- caucasian_vals.append( ('c',num) )
- full_dataset = asian_vals + caucasian_vals
- full_dataset.sort(key=itemgetter(1))
- print(full_dataset)
- nums = []
- for tup in full_dataset:
- nums.append(tup[1])
- ranked_nums = rankdata(nums)
- asian_w_stat = 0
- caucasian_w_stat = 0
- for rank, tup in zip(ranked_nums, full_dataset):
- if(tup[0] == 'a'):
- asian_w_stat += rank
- elif(tup[0] == 'c'):
- caucasian_w_stat += rank
- # We need the smaller w_stat
- small_w = min(asian_w_stat,caucasian_w_stat)
- print(small_w)
- # Get the Mean Value of the Wilcox Sum
- mu_w = calculate_mean(asian,caucasian)
- print(mu_w)
- # Calculate STD_W
- std_w = calculate_std(asian,caucasian)
- print(std_w)
- z_val = get_z(mu_w,small_w,std_w)
- print(z_val)
- # -- Apply Randomization --
- dataset_nums = []
- for li in full_dataset:
- dataset_nums.append(li[1])
- n_group = np.size(asian)
- results = apply_randomization(dataset_nums,n_group)
- # -- Calculate the P-Value
- diff_means = np.mean(asian) - np.mean(caucasian)
- p_val = np.sum( np.absolute(results) >= abs(diff_means))/np.size(results)
- print(p_val)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement