Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.26 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import pprint as pp
  4. import os
  5. import pprint as pp
  6.  
  7. from sklearn.feature_selection import SelectKBest
  8. from sklearn.feature_selection import f_classif
  9. from scipy.stats import kstest, ks_2samp
  10.  
  11. features_dic = {}
  12. results_dic = {}
  13.  
  14. script_dir = os.path.dirname(__file__)
  15. rel_path = "dane_zawaly.xlsx"
  16. abs_file_path = os.path.join(script_dir, rel_path)
  17.  
  18. dataExcel = pd.read_excel(abs_file_path, nrows=901)
  19. df = pd.DataFrame(dataExcel)
  20.  
  21.  
  22. feature_data = df.iloc[:, :-1]
  23. diagnose_classes = np.array(df['Klasa'])
  24.  
  25. #for feature, values in feature_data.iteritems():
  26.     #result = ks_2samp(values, diagnose_classes)
  27.     #result = kstest(values, 'norm')
  28.     #results_dic[feature] = result
  29.  
  30. #ranking = sorted([(feature, result) for feature, result in results_dic.items()], key=lambda z: z[1][0], reverse=True)
  31. #pp.pprint(ranking)
  32.  
  33. # Create an SelectKBest object to select features with two best ANOVA F-Values
  34. fvalue_selector = SelectKBest(f_classif, k=5)
  35.  
  36. # Apply the SelectKBest object to the features and target
  37. feature_data_kbest = fvalue_selector.fit_transform(feature_data, diagnose_classes)
  38.  
  39. print('Original number of features:', feature_data.shape[1])
  40. print('Reduced number of features:', feature_data_kbest.shape[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement