Advertisement
Guest User

Untitled

a guest
Dec 25th, 2019
262
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.33 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. # Train decision tree to detect phishing
  4. # Dataset: https://archive.ics.uci.edu/ml/datasets/Phishing+Websites#
  5.  
  6. import csv
  7. import arff
  8. import numpy as np
  9. from sklearn import *
  10. from sklearn import tree
  11. from sklearn.metrics import accuracy_score
  12.  
  13.  
  14. # Put the arff dataset into a csv
  15. def saveDataToCSV(inf, outf):
  16.     arffin = csv.reader(open(inf, 'r'), delimiter=',')
  17.     arffout = csv.writer(open(outf, 'w'))
  18.     arffout.writerows(arffin)
  19.  
  20.  
  21.  
  22. # Train decision tree
  23. def traintree(incsv):
  24.     training_data = np.genfromtxt(incsv, delimiter=',', dtype=np.int32) # load the dataset
  25.     inputs = training_data[:, :-1] # all attributes except the last one
  26.     outputs = training_data[:, -1] # last attribute
  27.  
  28.     # Divide dataset into training and testing
  29.     training_inputs = inputs[:2000]
  30.     training_outputs = outputs[:2000]
  31.     testing_inputs = inputs[2000:]
  32.     testing_outputs = outputs[2000:]
  33.  
  34.     classifier = tree.DecisionTreeClassifier()
  35.     classifier.fit(training_inputs, training_outputs)
  36.     predictions = classifier.predict(testing_inputs)
  37.     accuracy = 100.0 * accuracy_score(testing_outputs, predictions)
  38.  
  39.     print("Accuracy: " + str(accuracy))
  40.  
  41. def main():
  42.     saveDataToCSV("dataset.arff", "dataset.csv")
  43.     traintree("dataset.csv")
  44.  
  45. if __name__ == '__main__':
  46.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement