Advertisement
mikolajmki

si_lab04

Nov 3rd, 2022
167
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.05 KB | None | 0 0
  1. from scipy.io import wavfile
  2. import numpy as np
  3. import pandas as pd
  4. import os
  5. import pandas as pd
  6. import numpy as np
  7. from sklearn.neighbors import KNeighborsClassifier as kNN
  8. from sklearn.svm import SVC as SVM
  9. from sklearn.metrics import confusion_matrix
  10. from sklearn.model_selection import train_test_split
  11. col_names = ['Gender', 'Married', 'Education', 'Self_Employed', 'Loan_Status']
  12. col_values = ['Male', 'Yes', 'Graduate', 'Yes', 'Y']
  13. pd.options.mode.chained_assignment = None
  14. data = pd.read_excel('loan_data.xlsx');
  15. for i in range(len(col_names)):
  16.     mask = data[col_names[i]].values == col_values[i]
  17.     data[col_names[i]][mask] = 0
  18.     data[col_names[i]][~mask] = 1
  19. columns = list(data.columns);
  20. # mask = data ['Gender'].values == 'Female'
  21. # data['Gender'][mask] = 1
  22. # data['Gender'][~mask] = 0
  23. cat_feature = pd.Categorical(data.Property_Area)
  24. one_hot = pd.get_dummies(cat_feature)
  25. data = pd.concat([data, one_hot], axis = 1)
  26. data = data.drop(columns = ['Property_Area'])
  27. features = data.columns
  28. vals = data.values.astype(np.float64)
  29. X = vals[:, :-1]
  30. y = vals[:,-1]
  31. models = [kNN(), SVM()]
  32. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  33. for model in models:
  34.     model.fit(X_train,y_train)
  35.     y_pred = model.predict(X_test)
  36.     print(confusion_matrix(y_test, y_pred))
  37. from sklearn.tree import DecisionTreeClassifier as DT
  38. from sklearn.tree import plot_tree
  39. model = DT(max_depth=3)
  40. model.fit(X_train, y_train)
  41. y_pred = model.predict(X_test)
  42. cm = confusion_matrix(y_test, y_pred)
  43. print(cm)
  44. from matplotlib import pyplot as plt
  45. plt.figure(figsize=(20,10))
  46. tree_vis = plot_tree(model,feature_names=
  47.   data.columns[:-1],class_names=['N', 'Y'], fontsize = 20)
  48. import numpy as np
  49. from matplotlib import rcParams
  50. from matplotlib import pyplot as plt
  51. from sklearn.decomposition import PCA
  52. example = np.random.randn(500,2)
  53. example[:,1] *= 0.4
  54. rot_matrix = np.array([[1/2**0.5, 1/2**0.5],
  55. [1/2**0.5, -1/2**0.5]])
  56. example = np.dot(example, rot_matrix)
  57. example_PCAed = PCA(2).fit_transform(example)
  58. rcParams['font.size'] = 32
  59. rcParams['font.family'] = 'Times New Roman'
  60. fig, ax = plt.subplots(1,2, figsize=(20, 10))
  61. ax[0].scatter(example[:,0], example[:,1])
  62. ax[1].scatter(example_PCAed[:,0], example_PCAed[:,1])
  63. ax[0].set_xlim([-3, 3])
  64. ax[0].set_ylim([-3, 3])
  65. ax[1].set_xlim([-3, 3])
  66. ax[1].set_ylim([-3, 3])
  67. ax[0].set_xlabel('x')
  68. ax[0].set_ylabel('y')
  69. ax[1].set_xlabel('PC 1')
  70. ax[1].set_ylabel('PC 2')
  71. ax[0].set_title('Dane pierwotne')
  72. ax[1].set_title('Dane po PCA')
  73.  
  74. pca_transform = PCA()
  75. pca_transform.fit(X_train)
  76. variances = pca_transform.explained_variance_ratio_
  77. cumulated_variances = variances.cumsum()
  78. plt.scatter(np.arange(variances.shape[0]),
  79. cumulated_variances)
  80. plt.yticks(np.arange(0, 1.1, 0.1))
  81. PC_num = (cumulated_variances<0.95).sum()
  82.  
  83. X_pcaed = PCA(2).fit_transform(X_train)
  84. fig, ax = plt.subplots(1,1)
  85. females = y_train==1
  86. ax.scatter(X_pcaed[females,0], X_pcaed[females,1], label =
  87. 'female')
  88. ax.scatter(X_pcaed[~females,0], X_pcaed[~females,1], label
  89. = 'males')
  90. ax.legend()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement