Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Feature selection with Univariate Statistical Tests
- from pandas import read_csv
- from numpy import set_printoptions
- from sklearn.feature_selection import SelectKBest
- from sklearn.feature_selection import f_classif
- # load data
- filename = 'UNSW_2018_IoT_Botnet_Full5pc_1.csv'
- names= ['pkSeqID', 'stime', 'flgs', 'flgs_number', 'proto', 'proto_number', 'saddr',
- 'sport', 'daddr', 'dport', 'pkts', 'bytes', 'state', 'state_number', 'ltime', 'seq',
- 'dur', 'mean', 'stddev', 'sum', 'min', 'max', 'spkts', 'dpkts', 'sbytes', 'dbytes',
- 'rate', 'srate', 'drate', 'TnBPSrcIP', 'TnBPDstIP', 'TnP_PSrcIP', 'TnP_PDstIP', 'TnP_PerProto',
- 'TnP_Per_Dport', 'AR_P_Proto_P_SrcIP', 'AR_P_Proto_P_DstIP', 'N_IN_Conn_P_DstIP', 'N_IN_Conn_P_SrcIP',
- 'AR_P_Proto_P_Sport', 'AR_P_Proto_P_Dport', 'Pkts_P_State_P_Protocol_P_DestIP',
- 'Pkts_P_State_P_Protocol_P_SrcIP', 'attack', 'category', 'subcategory']
- dataframe = read_csv(filename, names=names, low_memory=False)
- array = dataframe.values
- X = array[:,0:45]
- Y = array[:,45]
- # feature extraction
- test = SelectKBest(score_func=f_classif, k=4)
- fit = test.fit(X, Y)
- set_printoptions(precision=3)
- print(fit.scores_)
- features = fit.transform(X)
- # summarize selected features
- print(features[0:5,:])
Advertisement
Add Comment
Please, Sign In to add comment