Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.73 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Jan 16 21:32:56 2018
  5.  
  6. @author: hericsonaraujo
  7. """
  8.  
  9. from sklearn.tree import DecisionTreeClassifier
  10. from sklearn.metrics import accuracy_score, make_scorer
  11. from sklearn.model_selection import train_test_split
  12.  
  13. # Import basic libraries
  14. import numpy as np # linear algebra
  15. import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
  16.  
  17. # import visualization libraries
  18. import seaborn as sns
  19. import matplotlib.pyplot as plt
  20. #from ggplot import *
  21.  
  22. df = pd.read_csv('UCI_Credit_Card.csv')
  23. df.sample(5)
  24.  
  25. df = df.rename(columns={'default.payment.next.month': 'def_pay',
  26.                         'PAY_0': 'PAY_1'})
  27. df.head()
  28.  
  29. df.info()
  30.  
  31. fil = (df.EDUCATION == 5) | (df.EDUCATION == 6) | (df.EDUCATION == 0)
  32. df.loc[fil, 'EDUCATION'] = 4
  33. df.EDUCATION.value_counts()
  34.  
  35. df.loc[df.MARRIAGE == 0, 'MARRIAGE'] = 3
  36. df.MARRIAGE.value_counts()
  37.  
  38. print (float(df.def_pay.sum())/float(len(df.def_pay)))
  39.  
  40.  
  41. fil = (df.PAY_1 == -2) | (df.PAY_1 == -1) | (df.PAY_1 == 0)
  42. df.loc[fil, 'PAY_1'] = 0
  43. fil = (df.PAY_2 == -2) | (df.PAY_2 == -1) | (df.PAY_2 == 0)
  44. df.loc[fil, 'PAY_2'] = 0
  45. fil = (df.PAY_3 == -2) | (df.PAY_3 == -1) | (df.PAY_3 == 0)
  46. df.loc[fil, 'PAY_3'] = 0
  47. fil = (df.PAY_4 == -2) | (df.PAY_4 == -1) | (df.PAY_4 == 0)
  48. df.loc[fil, 'PAY_4'] = 0
  49. fil = (df.PAY_5 == -2) | (df.PAY_5 == -1) | (df.PAY_5 == 0)
  50. df.loc[fil, 'PAY_5'] = 0
  51. fil = (df.PAY_6 == -2) | (df.PAY_6 == -1) | (df.PAY_6 == 0)
  52. df.loc[fil, 'PAY_6'] = 0
  53. late = df[['PAY_1','PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']]
  54. #draw_histograms(late, late.columns, 2, 3, 10)
  55.  
  56. y = df['def_pay'].copy()
  57. y.sample(5)
  58.  
  59. features = ['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2',
  60.        'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
  61.        'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
  62.        'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
  63. X = df[features].copy()
  64. X.columns
  65.  
  66. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
  67.  
  68. print(df.def_pay.describe())
  69. print("---------------------------")
  70. print(y_train.describe())
  71. print("---------------------------")
  72. print(y_test.describe())
  73.  
  74. #create the classifier
  75. classifier = DecisionTreeClassifier(max_depth=10, random_state=14)
  76. # training the classifier
  77. classifier.fit(X_train, y_train)
  78. # do our predictions on the test
  79. predictions = classifier.predict(X_test)
  80. # see how good we did on the test
  81. print(accuracy_score(y_true = y_test, y_pred = predictions))
  82.  
  83. classifier = DecisionTreeClassifier(max_depth=100, random_state=14)
  84. classifier.fit(X_train, y_train)
  85. predictions = classifier.predict(X_test)
  86. print(accuracy_score(y_true = y_test, y_pred = predictions))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement