Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # Train decision tree to detect phishing
- # Dataset: https://archive.ics.uci.edu/ml/datasets/Phishing+Websites#
- import csv
- import arff
- import numpy as np
- from sklearn import *
- from sklearn import tree
- from sklearn.metrics import accuracy_score
- # Put the arff dataset into a csv
- def saveDataToCSV(inf, outf):
- arffin = csv.reader(open(inf, 'r'), delimiter=',')
- arffout = csv.writer(open(outf, 'w'))
- arffout.writerows(arffin)
- # Train decision tree
- def traintree(incsv):
- training_data = np.genfromtxt(incsv, delimiter=',', dtype=np.int32) # load the dataset
- inputs = training_data[:, :-1] # all attributes except the last one
- outputs = training_data[:, -1] # last attribute
- # Divide dataset into training and testing
- training_inputs = inputs[:2000]
- training_outputs = outputs[:2000]
- testing_inputs = inputs[2000:]
- testing_outputs = outputs[2000:]
- classifier = tree.DecisionTreeClassifier()
- classifier.fit(training_inputs, training_outputs)
- predictions = classifier.predict(testing_inputs)
- accuracy = 100.0 * accuracy_score(testing_outputs, predictions)
- print("Accuracy: " + str(accuracy))
- def main():
- saveDataToCSV("dataset.arff", "dataset.csv")
- traintree("dataset.csv")
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement