Guest User

Untitled

a guest
Mar 21st, 2018
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.68 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 3,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. " member_id funded_amnt term int_rate emp_length \\\n",
  15. "count 42535.000000 42535.000000 42535.000000 42535.000000 42535.000000 \n",
  16. "mean 21268.000000 10821.585753 42.207218 0.121650 2.612789 \n",
  17. "std 12278.941187 7146.914675 10.509342 0.037079 15.223510 \n",
  18. "min 1.000000 500.000000 36.000000 0.054200 -88.000000 \n",
  19. "25% 10634.500000 5000.000000 36.000000 0.096300 2.000000 \n",
  20. "50% 21268.000000 9600.000000 36.000000 0.119900 4.000000 \n",
  21. "75% 31901.500000 15000.000000 60.000000 0.147200 9.000000 \n",
  22. "max 42535.000000 35000.000000 60.000000 0.245900 10.000000 \n",
  23. "\n",
  24. " income loan_status grade_num home_ownership_num \n",
  25. "count 4.253500e+04 42535.000000 42535.000000 42535.000000 \n",
  26. "mean 6.913005e+04 0.848807 5.329141 1.600623 \n",
  27. "std 6.409685e+04 0.358241 1.438428 0.626047 \n",
  28. "min -8.800000e+01 0.000000 1.000000 1.000000 \n",
  29. "25% 4.000000e+04 1.000000 4.000000 1.000000 \n",
  30. "50% 5.900000e+04 1.000000 6.000000 2.000000 \n",
  31. "75% 8.250000e+04 1.000000 6.000000 2.000000 \n",
  32. "max 6.000000e+06 1.000000 7.000000 3.000000 \n",
  33. "level_1 25% 50% 75% count max mean min \\\n",
  34. "loan_status \n",
  35. "0 5200.0 10000.0 15650.0 6431.0 35000.0 11492.190173 500.0 \n",
  36. "1 5000.0 9350.0 15000.0 36104.0 35000.0 10702.134805 500.0 \n",
  37. "\n",
  38. "level_1 std \n",
  39. "loan_status \n",
  40. "0 7655.264857 \n",
  41. "1 7045.939943 \n",
  42. "level_1 25% 50% 75% count max mean \\\n",
  43. "loan_status \n",
  44. "0 37000.00 53000.0 75000.0 6431.0 1250000.0 63366.870316 \n",
  45. "1 41155.75 60000.0 84000.0 36104.0 6000000.0 70156.608354 \n",
  46. "\n",
  47. "level_1 min std \n",
  48. "loan_status \n",
  49. "0 2000.0 49684.103617 \n",
  50. "1 -88.0 66284.371098 \n",
  51. "level_1 25% 50% 75% count max mean min std\n",
  52. "loan_status \n",
  53. "0 4.0 5.0 6.0 6431.0 7.0 4.643135 1.0 1.502201\n",
  54. "1 5.0 6.0 7.0 36104.0 7.0 5.451335 1.0 1.391751\n"
  55. ]
  56. },
  57. {
  58. "data": {
  59. "text/plain": [
  60. "<matplotlib.text.Text at 0xa875d50>"
  61. ]
  62. },
  63. "execution_count": 3,
  64. "metadata": {},
  65. "output_type": "execute_result"
  66. }
  67. ],
  68. "source": [
  69. "import pandas as pd\n",
  70. "import numpy as np \n",
  71. "import matplotlib.pyplot as plt\n",
  72. "from sklearn.linear_model import LogisticRegression\n",
  73. "from sklearn.model_selection import train_test_split\n",
  74. "import seaborn as sns\n",
  75. "\n",
  76. "\n",
  77. "#Read CSV file \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
  78. "df = pd.read_csv(\"C:/Users/ERice.ERP/Desktop/Building Knowledge/Python_Projects/Loan.csv\")\n",
  79. " \n",
  80. "\n",
  81. "#Look for missing data\n",
  82. "sum(df.isnull().values.ravel())\n",
  83. "\n",
  84. "\n",
  85. "#Creating numeric grade column\n",
  86. "\n",
  87. "grade_num = []\n",
  88. "\n",
  89. "for row in df['grade']:\n",
  90. " if row == 'A':\n",
  91. " grade_num.append(7)\n",
  92. " elif row == 'B':\n",
  93. " grade_num.append(6)\n",
  94. " elif row == 'C':\n",
  95. " grade_num.append(5)\n",
  96. " elif row == 'D':\n",
  97. " grade_num.append(4)\n",
  98. " elif row == 'E':\n",
  99. " grade_num.append(3)\n",
  100. " elif row == 'F':\n",
  101. " grade_num.append(2)\n",
  102. " else:\n",
  103. " grade_num.append(1)\n",
  104. " \n",
  105. "df['grade_num'] = grade_num\n",
  106. "\n",
  107. "\n",
  108. "#Creating numeric home_ownership column\n",
  109. "\n",
  110. "home_ownership_num = []\n",
  111. "\n",
  112. "for row in df['home_ownership']:\n",
  113. " if row == 'OWN':\n",
  114. " home_ownership_num.append(3)\n",
  115. " elif row == 'MORTGAGE':\n",
  116. " home_ownership_num.append(2)\n",
  117. " elif row == 'RENT':\n",
  118. " home_ownership_num.append(1)\n",
  119. " elif row == 'NONE':\n",
  120. " home_ownership_num.append(-88)\n",
  121. " else:\n",
  122. " home_ownership_num.append(-88)\n",
  123. " \n",
  124. "df['home_ownership_num'] = home_ownership_num\n",
  125. "\n",
  126. " \n",
  127. "# Mean imputation for missing home ownership \n",
  128. "from sklearn.preprocessing import Imputer # imputer library #class\n",
  129. "imputer = Imputer(missing_values = -88, strategy = 'mean', axis = 0) # receongnize what a missing value is , taking the mean of the column\n",
  130. "imputer = imputer.fit(df.iloc[:,10:12]) #Upper ban is not included, so we get all rows,and last to columns\n",
  131. "df.iloc[:,10:12] = imputer.transform(df.iloc[:,10:12]) #method\n",
  132. "\n",
  133. " \n",
  134. "#Making loan status numeric\n",
  135. "\n",
  136. "\n",
  137. "def func(row):\n",
  138. " if row['loan_status'] == 'Fully Paid':\n",
  139. " return 1\n",
  140. " else:\n",
  141. " return 0\n",
  142. "\n",
  143. "df['loan_status'] = df.apply(func, axis=1)\n",
  144. "\n",
  145. " \n",
  146. "# Descriptive Stats \n",
  147. "\n",
  148. "summary_stats = df.describe()\n",
  149. "print(summary_stats)\n",
  150. "\n",
  151. "summary_funded_amnt_by_status = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='funded_amnt', columns='level_1')\n",
  152. "print(summary_funded_amnt_by_status)\n",
  153. "\n",
  154. "status_by_income = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='income', columns='level_1')\n",
  155. "print(status_by_income)\n",
  156. "\n",
  157. "status_by_grade = df.groupby('loan_status').describe().reset_index().pivot(index='loan_status', values='grade_num', columns='level_1')\n",
  158. "print(status_by_grade)\n",
  159. "\n",
  160. "#Graphic Descriptives\n",
  161. "\n",
  162. "\n",
  163. "#Bar Chart for Interest Rate by Credit Grade\n",
  164. "means = df['int_rate'].groupby(df['grade']).mean()\n",
  165. "my_plot = means.plot(kind='bar',legend=None,title=\"Interest Rate by Credit Grade\")\n",
  166. "my_plot.set_xlabel(\"Credit Grade\")\n",
  167. "my_plot.set_ylabel(\"Mean Interest Rate\")\n",
  168. "\n",
  169. "#Bar Chart for Income by home ownership status\n",
  170. "income_median_by_home = df['income'].groupby(df['home_ownership']).median()\n",
  171. "my_plot = income_median_by_home.plot(kind='bar',legend=None,title=\"Median Income by Home Ownership Status\")\n",
  172. "my_plot.set_xlabel(\"Status\")\n",
  173. "my_plot.set_ylabel(\"Median Income\")\n"
  174. ]
  175. }
  176. ],
  177. "metadata": {
  178. "kernelspec": {
  179. "display_name": "Python 3",
  180. "language": "python",
  181. "name": "python3"
  182. },
  183. "language_info": {
  184. "codemirror_mode": {
  185. "name": "ipython",
  186. "version": 3
  187. },
  188. "file_extension": ".py",
  189. "mimetype": "text/x-python",
  190. "name": "python",
  191. "nbconvert_exporter": "python",
  192. "pygments_lexer": "ipython3",
  193. "version": "3.6.0"
  194. }
  195. },
  196. "nbformat": 4,
  197. "nbformat_minor": 2
  198. }
Add Comment
Please, Sign In to add comment