Advertisement
Guest User

Untitled

a guest
Aug 18th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.36 KB | None | 0 0
  1. {
  2. "nbformat_minor": 1,
  3. "cells": [
  4. {
  5. "execution_count": null,
  6. "cell_type": "code",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": "from sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import ExtraTreesClassifier,GradientBoostingClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.ensemble import VotingClassifier\nfrom sklearn.metrics import accuracy_score\nimport numpy as np\nimport pandas as pd"
  10. },
  11. {
  12. "execution_count": null,
  13. "cell_type": "code",
  14. "metadata": {
  15. "scrolled": true
  16. },
  17. "outputs": [],
  18. "source": "# The code was removed by Watson Studio for sharing."
  19. },
  20. {
  21. "execution_count": null,
  22. "cell_type": "code",
  23. "metadata": {},
  24. "outputs": [],
  25. "source": "df_data_1['address']=df_data_1['addr_state'].astype(str)+df_data_1['zip_code'].astype(str)"
  26. },
  27. {
  28. "execution_count": null,
  29. "cell_type": "code",
  30. "metadata": {},
  31. "outputs": [],
  32. "source": "removal_cols=['mths_since_last_major_derog',\n 'zip_code',\n 'application_type',\n 'mths_since_last_record',\n 'addr_state',\n 'mths_since_last_delinq']\npreserve_cols=[\"emp_title\",\"emp_length\",\"address\",\"last_week_pay\"]\nstring_cols=[\"term\",\"batch_enrolled\",\"grade\",\"sub_grade\"]"
  33. },
  34. {
  35. "execution_count": null,
  36. "cell_type": "code",
  37. "metadata": {},
  38. "outputs": [],
  39. "source": "df_final=df_data_1[preserve_cols].copy(deep=True)\nremoval_cols+=preserve_cols"
  40. },
  41. {
  42. "execution_count": null,
  43. "cell_type": "code",
  44. "metadata": {},
  45. "outputs": [],
  46. "source": "df_data_1.drop(removal_cols,axis=1,inplace=True)"
  47. },
  48. {
  49. "execution_count": null,
  50. "cell_type": "code",
  51. "metadata": {},
  52. "outputs": [],
  53. "source": "df_data_1.head()"
  54. },
  55. {
  56. "execution_count": null,
  57. "cell_type": "code",
  58. "metadata": {},
  59. "outputs": [],
  60. "source": "def replace_missing_values(df):\n return df_data_1.apply(lambda x: x.fillna(0) if x.dtype.kind in 'biufc' else x.fillna(' '))\n "
  61. },
  62. {
  63. "execution_count": null,
  64. "cell_type": "code",
  65. "metadata": {},
  66. "outputs": [],
  67. "source": "df_data_1=df_data_1[df_data_1['batch_enrolled']!=' ']"
  68. },
  69. {
  70. "execution_count": null,
  71. "cell_type": "code",
  72. "metadata": {},
  73. "outputs": [],
  74. "source": "df=replace_missing_values(df_data_1)"
  75. },
  76. {
  77. "execution_count": null,
  78. "cell_type": "code",
  79. "metadata": {},
  80. "outputs": [],
  81. "source": "df['no_funded_amnt']=df['loan_amnt']==df['funded_amnt']"
  82. },
  83. {
  84. "execution_count": null,
  85. "cell_type": "code",
  86. "metadata": {},
  87. "outputs": [],
  88. "source": "df=pd.get_dummies(df,columns=string_cols)"
  89. },
  90. {
  91. "execution_count": null,
  92. "cell_type": "code",
  93. "metadata": {},
  94. "outputs": [],
  95. "source": "target_col=df['home_ownership']\ndf.drop('home_ownership',axis=1,inplace=True)"
  96. },
  97. {
  98. "execution_count": null,
  99. "cell_type": "code",
  100. "metadata": {},
  101. "outputs": [],
  102. "source": "xtrain,xtest,ytrain,ytest=train_test_split(df,target_col,test_size=0.20, random_state=42)"
  103. },
  104. {
  105. "execution_count": null,
  106. "cell_type": "code",
  107. "metadata": {},
  108. "outputs": [],
  109. "source": "xg_clf=ExtraTreesClassifier(n_estimators=20)\ng_clf=GradientBoostingClassifier(n_estimators=40,learning_rate=0.001)\nsvm_clf = SVC(kernel=\"rbf\")\nv_clf=VotingClassifier(estimators=[('xg',xg_clf),('gb',g_clf),('svc',svm_clf)],voting='hard')"
  110. },
  111. {
  112. "execution_count": null,
  113. "cell_type": "code",
  114. "metadata": {
  115. "scrolled": true
  116. },
  117. "outputs": [],
  118. "source": "v_clf.fit(xtrain,ytrain)"
  119. },
  120. {
  121. "execution_count": null,
  122. "cell_type": "code",
  123. "metadata": {
  124. "scrolled": true
  125. },
  126. "outputs": [],
  127. "source": "for clf in (xg_clf, g_clf, svm_clf, v_clf):\n clf.fit(xtrain, ytrain)\n ypred = clf.predict(xtest)\n print(clf.__class__.__name__, accuracy_score(ytest, ypred))"
  128. },
  129. {
  130. "execution_count": null,
  131. "cell_type": "code",
  132. "metadata": {},
  133. "outputs": [],
  134. "source": ""
  135. },
  136. {
  137. "execution_count": null,
  138. "cell_type": "code",
  139. "metadata": {},
  140. "outputs": [],
  141. "source": ""
  142. },
  143. {
  144. "execution_count": null,
  145. "cell_type": "code",
  146. "metadata": {},
  147. "outputs": [],
  148. "source": ""
  149. },
  150. {
  151. "execution_count": null,
  152. "cell_type": "code",
  153. "metadata": {},
  154. "outputs": [],
  155. "source": ""
  156. },
  157. {
  158. "execution_count": null,
  159. "cell_type": "code",
  160. "metadata": {},
  161. "outputs": [],
  162. "source": ""
  163. },
  164. {
  165. "execution_count": null,
  166. "cell_type": "code",
  167. "metadata": {},
  168. "outputs": [],
  169. "source": ""
  170. },
  171. {
  172. "execution_count": null,
  173. "cell_type": "code",
  174. "metadata": {},
  175. "outputs": [],
  176. "source": ""
  177. },
  178. {
  179. "execution_count": null,
  180. "cell_type": "code",
  181. "metadata": {},
  182. "outputs": [],
  183. "source": ""
  184. },
  185. {
  186. "execution_count": null,
  187. "cell_type": "code",
  188. "metadata": {},
  189. "outputs": [],
  190. "source": ""
  191. },
  192. {
  193. "execution_count": null,
  194. "cell_type": "code",
  195. "metadata": {},
  196. "outputs": [],
  197. "source": ""
  198. },
  199. {
  200. "execution_count": null,
  201. "cell_type": "code",
  202. "metadata": {},
  203. "outputs": [],
  204. "source": ""
  205. },
  206. {
  207. "execution_count": null,
  208. "cell_type": "code",
  209. "metadata": {},
  210. "outputs": [],
  211. "source": ""
  212. },
  213. {
  214. "execution_count": null,
  215. "cell_type": "code",
  216. "metadata": {},
  217. "outputs": [],
  218. "source": ""
  219. },
  220. {
  221. "execution_count": null,
  222. "cell_type": "code",
  223. "metadata": {},
  224. "outputs": [],
  225. "source": ""
  226. },
  227. {
  228. "execution_count": null,
  229. "cell_type": "code",
  230. "metadata": {},
  231. "outputs": [],
  232. "source": ""
  233. },
  234. {
  235. "execution_count": null,
  236. "cell_type": "code",
  237. "metadata": {},
  238. "outputs": [],
  239. "source": ""
  240. },
  241. {
  242. "execution_count": null,
  243. "cell_type": "code",
  244. "metadata": {},
  245. "outputs": [],
  246. "source": ""
  247. },
  248. {
  249. "execution_count": null,
  250. "cell_type": "code",
  251. "metadata": {},
  252. "outputs": [],
  253. "source": ""
  254. },
  255. {
  256. "execution_count": null,
  257. "cell_type": "code",
  258. "metadata": {},
  259. "outputs": [],
  260. "source": ""
  261. },
  262. {
  263. "execution_count": null,
  264. "cell_type": "code",
  265. "metadata": {},
  266. "outputs": [],
  267. "source": ""
  268. },
  269. {
  270. "execution_count": null,
  271. "cell_type": "code",
  272. "metadata": {},
  273. "outputs": [],
  274. "source": ""
  275. },
  276. {
  277. "execution_count": null,
  278. "cell_type": "code",
  279. "metadata": {},
  280. "outputs": [],
  281. "source": ""
  282. },
  283. {
  284. "execution_count": null,
  285. "cell_type": "code",
  286. "metadata": {},
  287. "outputs": [],
  288. "source": ""
  289. }
  290. ],
  291. "metadata": {
  292. "kernelspec": {
  293. "display_name": "Python 3.6",
  294. "name": "python3",
  295. "language": "python"
  296. },
  297. "language_info": {
  298. "mimetype": "text/x-python",
  299. "nbconvert_exporter": "python",
  300. "version": "3.6.8",
  301. "name": "python",
  302. "file_extension": ".py",
  303. "pygments_lexer": "ipython3",
  304. "codemirror_mode": {
  305. "version": 3,
  306. "name": "ipython"
  307. }
  308. }
  309. },
  310. "nbformat": 4
  311. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement