Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.76 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import pprint as pp
  4. import os
  5. import pprint as pp
  6.  
  7. from scipy.stats import kstest, ks_2samp
  8.  
  9. features_dic = {}
  10. results_dic = {}
  11.  
  12. script_dir = os.path.dirname(__file__)
  13. rel_path = "dane_zawaly.xlsx"
  14. abs_file_path = os.path.join(script_dir, rel_path)
  15.  
  16. dataExcel = pd.read_excel(abs_file_path, nrows=901)
  17. df = pd.DataFrame(dataExcel)
  18.  
  19.  
  20. feature_data = df.iloc[:, :-1]
  21. diagnose_classes = np.array(df['Klasa'])
  22.  
  23. for feature, values in feature_data.iteritems():
  24.     #result = ks_2samp(values, diagnose_classes)
  25.     result = kstest(values, 'norm')
  26.     results_dic[feature] = result
  27.  
  28. ranking = sorted([(feature, result) for feature, result in results_dic.items()], key=lambda z: z[1][0], reverse=True)
  29. pp.pprint(ranking)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement