Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import pprint as pp
- import os
- import pprint as pp
- from scipy.stats import kstest, ks_2samp
- features_dic = {}
- results_dic = {}
- script_dir = os.path.dirname(__file__)
- rel_path = "dane_zawaly.xlsx"
- abs_file_path = os.path.join(script_dir, rel_path)
- dataExcel = pd.read_excel(abs_file_path, nrows=901)
- df = pd.DataFrame(dataExcel)
- feature_data = df.iloc[:, :-1]
- diagnose_classes = np.array(df['Klasa'])
- for feature, values in feature_data.iteritems():
- #result = ks_2samp(values, diagnose_classes)
- result = kstest(values, 'norm')
- results_dic[feature] = result
- ranking = sorted([(feature, result) for feature, result in results_dic.items()], key=lambda z: z[1][0], reverse=True)
- pp.pprint(ranking)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement