Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scipy.io import wavfile
- import numpy as np
- import pandas as pd
- import os
- import pandas as pd
- import numpy as np
- from sklearn.neighbors import KNeighborsClassifier as kNN
- from sklearn.svm import SVC as SVM
- from sklearn.metrics import confusion_matrix
- from sklearn.model_selection import train_test_split
- col_names = ['Gender', 'Married', 'Education', 'Self_Employed', 'Loan_Status']
- col_values = ['Male', 'Yes', 'Graduate', 'Yes', 'Y']
- pd.options.mode.chained_assignment = None
- data = pd.read_excel('loan_data.xlsx');
- for i in range(len(col_names)):
- mask = data[col_names[i]].values == col_values[i]
- data[col_names[i]][mask] = 0
- data[col_names[i]][~mask] = 1
- columns = list(data.columns);
- # mask = data ['Gender'].values == 'Female'
- # data['Gender'][mask] = 1
- # data['Gender'][~mask] = 0
- cat_feature = pd.Categorical(data.Property_Area)
- one_hot = pd.get_dummies(cat_feature)
- data = pd.concat([data, one_hot], axis = 1)
- data = data.drop(columns = ['Property_Area'])
- features = data.columns
- vals = data.values.astype(np.float64)
- X = vals[:, :-1]
- y = vals[:,-1]
- models = [kNN(), SVM()]
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- for model in models:
- model.fit(X_train,y_train)
- y_pred = model.predict(X_test)
- print(confusion_matrix(y_test, y_pred))
- from sklearn.tree import DecisionTreeClassifier as DT
- from sklearn.tree import plot_tree
- model = DT(max_depth=3)
- model.fit(X_train, y_train)
- y_pred = model.predict(X_test)
- cm = confusion_matrix(y_test, y_pred)
- print(cm)
- from matplotlib import pyplot as plt
- plt.figure(figsize=(20,10))
- tree_vis = plot_tree(model,feature_names=
- data.columns[:-1],class_names=['N', 'Y'], fontsize = 20)
- import numpy as np
- from matplotlib import rcParams
- from matplotlib import pyplot as plt
- from sklearn.decomposition import PCA
- example = np.random.randn(500,2)
- example[:,1] *= 0.4
- rot_matrix = np.array([[1/2**0.5, 1/2**0.5],
- [1/2**0.5, -1/2**0.5]])
- example = np.dot(example, rot_matrix)
- example_PCAed = PCA(2).fit_transform(example)
- rcParams['font.size'] = 32
- rcParams['font.family'] = 'Times New Roman'
- fig, ax = plt.subplots(1,2, figsize=(20, 10))
- ax[0].scatter(example[:,0], example[:,1])
- ax[1].scatter(example_PCAed[:,0], example_PCAed[:,1])
- ax[0].set_xlim([-3, 3])
- ax[0].set_ylim([-3, 3])
- ax[1].set_xlim([-3, 3])
- ax[1].set_ylim([-3, 3])
- ax[0].set_xlabel('x')
- ax[0].set_ylabel('y')
- ax[1].set_xlabel('PC 1')
- ax[1].set_ylabel('PC 2')
- ax[0].set_title('Dane pierwotne')
- ax[1].set_title('Dane po PCA')
- pca_transform = PCA()
- pca_transform.fit(X_train)
- variances = pca_transform.explained_variance_ratio_
- cumulated_variances = variances.cumsum()
- plt.scatter(np.arange(variances.shape[0]),
- cumulated_variances)
- plt.yticks(np.arange(0, 1.1, 0.1))
- PC_num = (cumulated_variances<0.95).sum()
- X_pcaed = PCA(2).fit_transform(X_train)
- fig, ax = plt.subplots(1,1)
- females = y_train==1
- ax.scatter(X_pcaed[females,0], X_pcaed[females,1], label =
- 'female')
- ax.scatter(X_pcaed[~females,0], X_pcaed[~females,1], label
- = 'males')
- ax.legend()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement