needMoreRAM

Untitled

Apr 27th, 2020
665
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.25 KB | None | 0 0
  1. #Feature selection with Univariate Statistical Tests
  2. from pandas import read_csv
  3. from numpy import set_printoptions
  4. from sklearn.feature_selection import SelectKBest
  5. from sklearn.feature_selection import f_classif
  6.  
  7. # load data
  8. filename = 'UNSW_2018_IoT_Botnet_Full5pc_1.csv'
  9. names= ['pkSeqID', 'stime', 'flgs', 'flgs_number', 'proto', 'proto_number', 'saddr',
  10.     'sport', 'daddr', 'dport', 'pkts', 'bytes', 'state', 'state_number', 'ltime', 'seq',
  11.     'dur', 'mean', 'stddev', 'sum', 'min', 'max', 'spkts', 'dpkts', 'sbytes', 'dbytes',
  12.     'rate', 'srate', 'drate', 'TnBPSrcIP', 'TnBPDstIP', 'TnP_PSrcIP', 'TnP_PDstIP', 'TnP_PerProto',
  13.     'TnP_Per_Dport', 'AR_P_Proto_P_SrcIP', 'AR_P_Proto_P_DstIP', 'N_IN_Conn_P_DstIP', 'N_IN_Conn_P_SrcIP',
  14.     'AR_P_Proto_P_Sport', 'AR_P_Proto_P_Dport', 'Pkts_P_State_P_Protocol_P_DestIP',
  15.     'Pkts_P_State_P_Protocol_P_SrcIP', 'attack', 'category', 'subcategory']
  16.  
  17. dataframe = read_csv(filename, names=names, low_memory=False)
  18. array = dataframe.values
  19. X = array[:,0:45]
  20. Y = array[:,45]
  21.  
  22. # feature extraction
  23. test = SelectKBest(score_func=f_classif, k=4)
  24. fit = test.fit(X, Y)
  25.  
  26. set_printoptions(precision=3)
  27. print(fit.scores_)
  28. features = fit.transform(X)
  29.  
  30. # summarize selected features
  31. print(features[0:5,:])
Advertisement
Add Comment
Please, Sign In to add comment