Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " member_id funded_amnt term int_rate emp_length \\\n",
- "count 42535.000000 42535.000000 42535.000000 42535.000000 42535.000000 \n",
- "mean 21268.000000 10821.585753 42.207218 0.121650 2.612789 \n",
- "std 12278.941187 7146.914675 10.509342 0.037079 15.223510 \n",
- "min 1.000000 500.000000 36.000000 0.054200 -88.000000 \n",
- "25% 10634.500000 5000.000000 36.000000 0.096300 2.000000 \n",
- "50% 21268.000000 9600.000000 36.000000 0.119900 4.000000 \n",
- "75% 31901.500000 15000.000000 60.000000 0.147200 9.000000 \n",
- "max 42535.000000 35000.000000 60.000000 0.245900 10.000000 \n",
- "\n",
- " income loan_status grade_num home_ownership_num \n",
- "count 4.253500e+04 42535.000000 42535.000000 42535.000000 \n",
- "mean 6.913005e+04 0.848807 5.329141 1.600623 \n",
- "std 6.409685e+04 0.358241 1.438428 0.626047 \n",
- "min -8.800000e+01 0.000000 1.000000 1.000000 \n",
- "25% 4.000000e+04 1.000000 4.000000 1.000000 \n",
- "50% 5.900000e+04 1.000000 6.000000 2.000000 \n",
- "75% 8.250000e+04 1.000000 6.000000 2.000000 \n",
- "max 6.000000e+06 1.000000 7.000000 3.000000 \n",
- "level_1 25% 50% 75% count max mean min \\\n",
- "loan_status \n",
- "0 5200.0 10000.0 15650.0 6431.0 35000.0 11492.190173 500.0 \n",
- "1 5000.0 9350.0 15000.0 36104.0 35000.0 10702.134805 500.0 \n",
- "\n",
- "level_1 std \n",
- "loan_status \n",
- "0 7655.264857 \n",
- "1 7045.939943 \n",
- "level_1 25% 50% 75% count max mean \\\n",
- "loan_status \n",
- "0 37000.00 53000.0 75000.0 6431.0 1250000.0 63366.870316 \n",
- "1 41155.75 60000.0 84000.0 36104.0 6000000.0 70156.608354 \n",
- "\n",
- "level_1 min std \n",
- "loan_status \n",
- "0 2000.0 49684.103617 \n",
- "1 -88.0 66284.371098 \n",
- "level_1 25% 50% 75% count max mean min std\n",
- "loan_status \n",
- "0 4.0 5.0 6.0 6431.0 7.0 4.643135 1.0 1.502201\n",
- "1 5.0 6.0 7.0 36104.0 7.0 5.451335 1.0 1.391751\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "<matplotlib.text.Text at 0xa875d50>"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import pandas as pd\n",
- "import numpy as np \n",
- "import matplotlib.pyplot as plt\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.model_selection import train_test_split\n",
- "import seaborn as sns\n",
- "\n",
- "\n",
- "#Read CSV file \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
- "df = pd.read_csv(\"C:/Users/ERice.ERP/Desktop/Building Knowledge/Python_Projects/Loan.csv\")\n",
- " \n",
- "\n",
- "#Look for missing data\n",
- "sum(df.isnull().values.ravel())\n",
- "\n",
- "\n",
- "#Creating numeric grade column\n",
- "\n",
- "grade_num = []\n",
- "\n",
- "for row in df['grade']:\n",
- " if row == 'A':\n",
- " grade_num.append(7)\n",
- " elif row == 'B':\n",
- " grade_num.append(6)\n",
- " elif row == 'C':\n",
- " grade_num.append(5)\n",
- " elif row == 'D':\n",
- " grade_num.append(4)\n",
- " elif row == 'E':\n",
- " grade_num.append(3)\n",
- " elif row == 'F':\n",
- " grade_num.append(2)\n",
- " else:\n",
- " grade_num.append(1)\n",
- " \n",
- "df['grade_num'] = grade_num\n",
- "\n",
- "\n",
- "#Creating numeric home_ownership column\n",
- "\n",
- "home_ownership_num = []\n",
- "\n",
- "for row in df['home_ownership']:\n",
- " if row == 'OWN':\n",
- " home_ownership_num.append(3)\n",
- " elif row == 'MORTGAGE':\n",
- " home_ownership_num.append(2)\n",
- " elif row == 'RENT':\n",
- " home_ownership_num.append(1)\n",
- " elif row == 'NONE':\n",
- " home_ownership_num.append(-88)\n",
- " else:\n",
- " home_ownership_num.append(-88)\n",
- " \n",
- "df['home_ownership_num'] = home_ownership_num\n",
- "\n",
- " \n",
- "# Mean imputation for missing home ownership \n",
- "from sklearn.preprocessing import Imputer # imputer library #class\n",
- "imputer = Imputer(missing_values = -88, strategy = 'mean', axis = 0) # receongnize what a missing value is , taking the mean of the column\n",
- "imputer = imputer.fit(df.iloc[:,10:12]) #Upper ban is not included, so we get all rows,and last to columns\n",
- "df.iloc[:,10:12] = imputer.transform(df.iloc[:,10:12]) #method\n",
- "\n",
- " \n",
- "#Making loan status numeric\n",
- "\n",
- "\n",
- "def func(row):\n",
- " if row['loan_status'] == 'Fully Paid':\n",
- " return 1\n",
- " else:\n",
- " return 0\n",
- "\n",
- "df['loan_status'] = df.apply(func, axis=1)\n",
- "\n",
- " \n",
- "# Descriptive Stats \n",
- "\n",
- "summary_stats = df.describe()\n",
- "print(summary_stats)\n",
- "\n",
- "summary_funded_amnt_by_status = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='funded_amnt', columns='level_1')\n",
- "print(summary_funded_amnt_by_status)\n",
- "\n",
- "status_by_income = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='income', columns='level_1')\n",
- "print(status_by_income)\n",
- "\n",
- "status_by_grade = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='grade_num', columns='level_1')\n",
- "print(status_by_grade)\n",
- "\n",
- "#Graphic Descriptives\n",
- "\n",
- "\n",
- "#Bar Chart for Interest Rate by Credit Grade\n",
- "means = df['int_rate'].groupby(df['grade']).mean()\n",
- "my_plot = means.plot(kind='bar',legend=None,title=\"Interest Rate by Credit Grade\")\n",
- "my_plot.set_xlabel(\"Credit Grade\")\n",
- "my_plot.set_ylabel(\"Mean Interest Rate\")\n",
- "\n",
- "#Bar Chart for Income by home ownership status\n",
- "income_median_by_home = df['income'].groupby(df['home_ownership']).median()\n",
- "my_plot = income_median_by_home.plot(kind='bar',legend=None,title=\"Median Income by Home Ownership Status\")\n",
- "my_plot.set_xlabel(\"Status\")\n",
- "my_plot.set_ylabel(\"Median Income\")\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment