Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.33 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 185,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "Libraries imported.\n"
  13. ]
  14. }
  15. ],
  16. "source": [
  17. "import numpy as np\n",
  18. "import pandas as pd\n",
  19. "pd.set_option('display.max_columns', None)\n",
  20. "pd.set_option('display.max_rows', None)\n",
  21. "import json\n",
  22. "from geopy.geocoders import Nominatim\n",
  23. "import requests\n",
  24. "from pandas.io.json import json_normalize\n",
  25. "\n",
  26. "import matplotlib.cm as cm\n",
  27. "import matplotlib.colors as colors\n",
  28. "\n",
  29. "from sklearn.cluster import KMeans\n",
  30. "import folium\n",
  31. "\n",
  32. "print('Libraries imported.')"
  33. ]
  34. },
  35. {
  36. "cell_type": "code",
  37. "execution_count": 186,
  38. "metadata": {},
  39. "outputs": [
  40. {
  41. "data": {
  42. "text/html": [
  43. "<div>\n",
  44. "<style scoped>\n",
  45. " .dataframe tbody tr th:only-of-type {\n",
  46. " vertical-align: middle;\n",
  47. " }\n",
  48. "\n",
  49. " .dataframe tbody tr th {\n",
  50. " vertical-align: top;\n",
  51. " }\n",
  52. "\n",
  53. " .dataframe thead th {\n",
  54. " text-align: right;\n",
  55. " }\n",
  56. "</style>\n",
  57. "<table border=\"1\" class=\"dataframe\">\n",
  58. " <thead>\n",
  59. " <tr style=\"text-align: right;\">\n",
  60. " <th></th>\n",
  61. " <th>postcode</th>\n",
  62. " <th>borough</th>\n",
  63. " <th>neighborhood</th>\n",
  64. " </tr>\n",
  65. " </thead>\n",
  66. " <tbody>\n",
  67. " <tr>\n",
  68. " <th>0</th>\n",
  69. " <td>M1A</td>\n",
  70. " <td>Not assigned</td>\n",
  71. " <td>Not assigned</td>\n",
  72. " </tr>\n",
  73. " <tr>\n",
  74. " <th>1</th>\n",
  75. " <td>M2A</td>\n",
  76. " <td>Not assigned</td>\n",
  77. " <td>Not assigned</td>\n",
  78. " </tr>\n",
  79. " <tr>\n",
  80. " <th>2</th>\n",
  81. " <td>M3A</td>\n",
  82. " <td>North York</td>\n",
  83. " <td>Parkwoods</td>\n",
  84. " </tr>\n",
  85. " <tr>\n",
  86. " <th>3</th>\n",
  87. " <td>M4A</td>\n",
  88. " <td>North York</td>\n",
  89. " <td>Victoria Village</td>\n",
  90. " </tr>\n",
  91. " <tr>\n",
  92. " <th>4</th>\n",
  93. " <td>M5A</td>\n",
  94. " <td>Downtown Toronto</td>\n",
  95. " <td>Harbourfront</td>\n",
  96. " </tr>\n",
  97. " <tr>\n",
  98. " <th>5</th>\n",
  99. " <td>M5A</td>\n",
  100. " <td>Downtown Toronto</td>\n",
  101. " <td>Regent Park</td>\n",
  102. " </tr>\n",
  103. " <tr>\n",
  104. " <th>6</th>\n",
  105. " <td>M6A</td>\n",
  106. " <td>North York</td>\n",
  107. " <td>Lawrence Heights</td>\n",
  108. " </tr>\n",
  109. " <tr>\n",
  110. " <th>7</th>\n",
  111. " <td>M6A</td>\n",
  112. " <td>North York</td>\n",
  113. " <td>Lawrence Manor</td>\n",
  114. " </tr>\n",
  115. " <tr>\n",
  116. " <th>8</th>\n",
  117. " <td>M7A</td>\n",
  118. " <td>Queen's Park</td>\n",
  119. " <td>Not assigned</td>\n",
  120. " </tr>\n",
  121. " <tr>\n",
  122. " <th>9</th>\n",
  123. " <td>M8A</td>\n",
  124. " <td>Not assigned</td>\n",
  125. " <td>Not assigned</td>\n",
  126. " </tr>\n",
  127. " </tbody>\n",
  128. "</table>\n",
  129. "</div>"
  130. ],
  131. "text/plain": [
  132. " postcode borough neighborhood\n",
  133. "0 M1A Not assigned Not assigned\n",
  134. "1 M2A Not assigned Not assigned\n",
  135. "2 M3A North York Parkwoods\n",
  136. "3 M4A North York Victoria Village\n",
  137. "4 M5A Downtown Toronto Harbourfront\n",
  138. "5 M5A Downtown Toronto Regent Park\n",
  139. "6 M6A North York Lawrence Heights\n",
  140. "7 M6A North York Lawrence Manor\n",
  141. "8 M7A Queen's Park Not assigned\n",
  142. "9 M8A Not assigned Not assigned"
  143. ]
  144. },
  145. "execution_count": 186,
  146. "metadata": {},
  147. "output_type": "execute_result"
  148. }
  149. ],
  150. "source": [
  151. "# importing data from csv file...\n",
  152. "# generating head of dataframe...\n",
  153. "df = pd.read_csv('postalcodes_data.csv')\n",
  154. "df.head(10)"
  155. ]
  156. },
  157. {
  158. "cell_type": "code",
  159. "execution_count": 187,
  160. "metadata": {},
  161. "outputs": [
  162. {
  163. "data": {
  164. "text/html": [
  165. "<div>\n",
  166. "<style scoped>\n",
  167. " .dataframe tbody tr th:only-of-type {\n",
  168. " vertical-align: middle;\n",
  169. " }\n",
  170. "\n",
  171. " .dataframe tbody tr th {\n",
  172. " vertical-align: top;\n",
  173. " }\n",
  174. "\n",
  175. " .dataframe thead th {\n",
  176. " text-align: right;\n",
  177. " }\n",
  178. "</style>\n",
  179. "<table border=\"1\" class=\"dataframe\">\n",
  180. " <thead>\n",
  181. " <tr style=\"text-align: right;\">\n",
  182. " <th></th>\n",
  183. " <th>postcode</th>\n",
  184. " <th>borough</th>\n",
  185. " <th>neighborhood</th>\n",
  186. " </tr>\n",
  187. " </thead>\n",
  188. " <tbody>\n",
  189. " <tr>\n",
  190. " <th>0</th>\n",
  191. " <td>M1A</td>\n",
  192. " <td>Not assigned</td>\n",
  193. " <td>Not assigned</td>\n",
  194. " </tr>\n",
  195. " <tr>\n",
  196. " <th>12</th>\n",
  197. " <td>M1B</td>\n",
  198. " <td>Scarborough</td>\n",
  199. " <td>Malvern</td>\n",
  200. " </tr>\n",
  201. " <tr>\n",
  202. " <th>11</th>\n",
  203. " <td>M1B</td>\n",
  204. " <td>Scarborough</td>\n",
  205. " <td>Rouge</td>\n",
  206. " </tr>\n",
  207. " <tr>\n",
  208. " <th>27</th>\n",
  209. " <td>M1C</td>\n",
  210. " <td>Scarborough</td>\n",
  211. " <td>Highland Creek</td>\n",
  212. " </tr>\n",
  213. " <tr>\n",
  214. " <th>28</th>\n",
  215. " <td>M1C</td>\n",
  216. " <td>Scarborough</td>\n",
  217. " <td>Rouge Hill</td>\n",
  218. " </tr>\n",
  219. " <tr>\n",
  220. " <th>29</th>\n",
  221. " <td>M1C</td>\n",
  222. " <td>Scarborough</td>\n",
  223. " <td>Port Union</td>\n",
  224. " </tr>\n",
  225. " <tr>\n",
  226. " <th>44</th>\n",
  227. " <td>M1E</td>\n",
  228. " <td>Scarborough</td>\n",
  229. " <td>West Hill</td>\n",
  230. " </tr>\n",
  231. " <tr>\n",
  232. " <th>43</th>\n",
  233. " <td>M1E</td>\n",
  234. " <td>Scarborough</td>\n",
  235. " <td>Morningside</td>\n",
  236. " </tr>\n",
  237. " <tr>\n",
  238. " <th>42</th>\n",
  239. " <td>M1E</td>\n",
  240. " <td>Scarborough</td>\n",
  241. " <td>Guildwood</td>\n",
  242. " </tr>\n",
  243. " <tr>\n",
  244. " <th>53</th>\n",
  245. " <td>M1G</td>\n",
  246. " <td>Scarborough</td>\n",
  247. " <td>Woburn</td>\n",
  248. " </tr>\n",
  249. " </tbody>\n",
  250. "</table>\n",
  251. "</div>"
  252. ],
  253. "text/plain": [
  254. " postcode borough neighborhood\n",
  255. "0 M1A Not assigned Not assigned\n",
  256. "12 M1B Scarborough Malvern\n",
  257. "11 M1B Scarborough Rouge\n",
  258. "27 M1C Scarborough Highland Creek\n",
  259. "28 M1C Scarborough Rouge Hill\n",
  260. "29 M1C Scarborough Port Union\n",
  261. "44 M1E Scarborough West Hill\n",
  262. "43 M1E Scarborough Morningside\n",
  263. "42 M1E Scarborough Guildwood\n",
  264. "53 M1G Scarborough Woburn"
  265. ]
  266. },
  267. "execution_count": 187,
  268. "metadata": {},
  269. "output_type": "execute_result"
  270. }
  271. ],
  272. "source": [
  273. "# ascending order by postcode column...\n",
  274. "df.index.name = None\n",
  275. "df.sort_values(['postcode'], ascending=True, axis=0, inplace=True)\n",
  276. "df.head(10)"
  277. ]
  278. },
  279. {
  280. "cell_type": "code",
  281. "execution_count": 188,
  282. "metadata": {},
  283. "outputs": [
  284. {
  285. "data": {
  286. "text/plain": [
  287. "True"
  288. ]
  289. },
  290. "execution_count": 188,
  291. "metadata": {},
  292. "output_type": "execute_result"
  293. }
  294. ],
  295. "source": [
  296. "# ensuring data type consistency...\n",
  297. "all(isinstance(column, str) for column in df.columns)"
  298. ]
  299. },
  300. {
  301. "cell_type": "code",
  302. "execution_count": 189,
  303. "metadata": {},
  304. "outputs": [
  305. {
  306. "data": {
  307. "text/html": [
  308. "<div>\n",
  309. "<style scoped>\n",
  310. " .dataframe tbody tr th:only-of-type {\n",
  311. " vertical-align: middle;\n",
  312. " }\n",
  313. "\n",
  314. " .dataframe tbody tr th {\n",
  315. " vertical-align: top;\n",
  316. " }\n",
  317. "\n",
  318. " .dataframe thead th {\n",
  319. " text-align: right;\n",
  320. " }\n",
  321. "</style>\n",
  322. "<table border=\"1\" class=\"dataframe\">\n",
  323. " <thead>\n",
  324. " <tr style=\"text-align: right;\">\n",
  325. " <th></th>\n",
  326. " <th></th>\n",
  327. " <th></th>\n",
  328. " </tr>\n",
  329. " <tr>\n",
  330. " <th>postcode</th>\n",
  331. " <th>borough</th>\n",
  332. " <th>neighborhood</th>\n",
  333. " </tr>\n",
  334. " </thead>\n",
  335. " <tbody>\n",
  336. " <tr>\n",
  337. " <th>M1A</th>\n",
  338. " <th>Not assigned</th>\n",
  339. " <th>Not assigned</th>\n",
  340. " </tr>\n",
  341. " <tr>\n",
  342. " <th rowspan=\"2\" valign=\"top\">M1B</th>\n",
  343. " <th rowspan=\"2\" valign=\"top\">Scarborough</th>\n",
  344. " <th>Malvern</th>\n",
  345. " </tr>\n",
  346. " <tr>\n",
  347. " <th>Rouge</th>\n",
  348. " </tr>\n",
  349. " <tr>\n",
  350. " <th rowspan=\"3\" valign=\"top\">M1C</th>\n",
  351. " <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
  352. " <th>Highland Creek</th>\n",
  353. " </tr>\n",
  354. " <tr>\n",
  355. " <th>Port Union</th>\n",
  356. " </tr>\n",
  357. " <tr>\n",
  358. " <th>Rouge Hill</th>\n",
  359. " </tr>\n",
  360. " <tr>\n",
  361. " <th rowspan=\"3\" valign=\"top\">M1E</th>\n",
  362. " <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
  363. " <th>Guildwood</th>\n",
  364. " </tr>\n",
  365. " <tr>\n",
  366. " <th>Morningside</th>\n",
  367. " </tr>\n",
  368. " <tr>\n",
  369. " <th>West Hill</th>\n",
  370. " </tr>\n",
  371. " <tr>\n",
  372. " <th>M1G</th>\n",
  373. " <th>Scarborough</th>\n",
  374. " <th>Woburn</th>\n",
  375. " </tr>\n",
  376. " <tr>\n",
  377. " <th>M1H</th>\n",
  378. " <th>Scarborough</th>\n",
  379. " <th>Cedarbrae</th>\n",
  380. " </tr>\n",
  381. " <tr>\n",
  382. " <th>M1J</th>\n",
  383. " <th>Scarborough</th>\n",
  384. " <th>Scarborough Village</th>\n",
  385. " </tr>\n",
  386. " <tr>\n",
  387. " <th rowspan=\"3\" valign=\"top\">M1K</th>\n",
  388. " <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
  389. " <th>East Birchmount Park</th>\n",
  390. " </tr>\n",
  391. " <tr>\n",
  392. " <th>Ionview</th>\n",
  393. " </tr>\n",
  394. " <tr>\n",
  395. " <th>Kennedy Park</th>\n",
  396. " </tr>\n",
  397. " <tr>\n",
  398. " <th rowspan=\"3\" valign=\"top\">M1L</th>\n",
  399. " <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
  400. " <th>Clairlea</th>\n",
  401. " </tr>\n",
  402. " <tr>\n",
  403. " <th>Golden Mile</th>\n",
  404. " </tr>\n",
  405. " <tr>\n",
  406. " <th>Oakridge</th>\n",
  407. " </tr>\n",
  408. " <tr>\n",
  409. " <th rowspan=\"2\" valign=\"top\">M1M</th>\n",
  410. " <th rowspan=\"2\" valign=\"top\">Scarborough</th>\n",
  411. " <th>Cliffcrest</th>\n",
  412. " </tr>\n",
  413. " <tr>\n",
  414. " <th>Cliffside</th>\n",
  415. " </tr>\n",
  416. " </tbody>\n",
  417. "</table>\n",
  418. "</div>"
  419. ],
  420. "text/plain": [
  421. "Empty DataFrame\n",
  422. "Columns: []\n",
  423. "Index: [(M1A, Not assigned, Not assigned), (M1B, Scarborough, Malvern), (M1B, Scarborough, Rouge), (M1C, Scarborough, Highland Creek), (M1C, Scarborough, Port Union), (M1C, Scarborough, Rouge Hill), (M1E, Scarborough, Guildwood), (M1E, Scarborough, Morningside), (M1E, Scarborough, West Hill), (M1G, Scarborough, Woburn), (M1H, Scarborough, Cedarbrae), (M1J, Scarborough, Scarborough Village), (M1K, Scarborough, East Birchmount Park), (M1K, Scarborough, Ionview), (M1K, Scarborough, Kennedy Park), (M1L, Scarborough, Clairlea), (M1L, Scarborough, Golden Mile), (M1L, Scarborough, Oakridge), (M1M, Scarborough, Cliffcrest), (M1M, Scarborough, Cliffside)]"
  424. ]
  425. },
  426. "execution_count": 189,
  427. "metadata": {},
  428. "output_type": "execute_result"
  429. }
  430. ],
  431. "source": [
  432. "# merging and combining rows and grouping by all columns...\n",
  433. "df = df.groupby(['postcode', 'borough', 'neighborhood']).sum()\n",
  434. "df.head(20)"
  435. ]
  436. },
  437. {
  438. "cell_type": "code",
  439. "execution_count": 195,
  440. "metadata": {},
  441. "outputs": [
  442. {
  443. "data": {
  444. "text/html": [
  445. "<div>\n",
  446. "<style scoped>\n",
  447. " .dataframe tbody tr th:only-of-type {\n",
  448. " vertical-align: middle;\n",
  449. " }\n",
  450. "\n",
  451. " .dataframe tbody tr th {\n",
  452. " vertical-align: top;\n",
  453. " }\n",
  454. "\n",
  455. " .dataframe thead th {\n",
  456. " text-align: right;\n",
  457. " }\n",
  458. "</style>\n",
  459. "<table border=\"1\" class=\"dataframe\">\n",
  460. " <thead>\n",
  461. " <tr style=\"text-align: right;\">\n",
  462. " <th></th>\n",
  463. " </tr>\n",
  464. " <tr>\n",
  465. " <th>postcode</th>\n",
  466. " </tr>\n",
  467. " </thead>\n",
  468. " <tbody>\n",
  469. " <tr>\n",
  470. " <th>M1A</th>\n",
  471. " </tr>\n",
  472. " <tr>\n",
  473. " <th>M1B</th>\n",
  474. " </tr>\n",
  475. " <tr>\n",
  476. " <th>M1C</th>\n",
  477. " </tr>\n",
  478. " <tr>\n",
  479. " <th>M1E</th>\n",
  480. " </tr>\n",
  481. " <tr>\n",
  482. " <th>M1G</th>\n",
  483. " </tr>\n",
  484. " </tbody>\n",
  485. "</table>\n",
  486. "</div>"
  487. ],
  488. "text/plain": [
  489. "Empty DataFrame\n",
  490. "Columns: []\n",
  491. "Index: [M1A, M1B, M1C, M1E, M1G]"
  492. ]
  493. },
  494. "execution_count": 195,
  495. "metadata": {},
  496. "output_type": "execute_result"
  497. }
  498. ],
  499. "source": [
  500. "df = df.groupby(['postcode']).sum()\n",
  501. "df.head()"
  502. ]
  503. },
  504. {
  505. "cell_type": "code",
  506. "execution_count": 194,
  507. "metadata": {},
  508. "outputs": [
  509. {
  510. "name": "stdout",
  511. "output_type": "stream",
  512. "text": [
  513. "data dimensions: (180, 0)\n"
  514. ]
  515. }
  516. ],
  517. "source": [
  518. "# printing the number of rows of dataframe based on merged postcode column...\n",
  519. "print ('data dimensions:', df.shape)"
  520. ]
  521. },
  522. {
  523. "cell_type": "code",
  524. "execution_count": null,
  525. "metadata": {},
  526. "outputs": [],
  527. "source": []
  528. },
  529. {
  530. "cell_type": "code",
  531. "execution_count": null,
  532. "metadata": {},
  533. "outputs": [],
  534. "source": []
  535. },
  536. {
  537. "cell_type": "code",
  538. "execution_count": null,
  539. "metadata": {},
  540. "outputs": [],
  541. "source": []
  542. },
  543. {
  544. "cell_type": "code",
  545. "execution_count": null,
  546. "metadata": {},
  547. "outputs": [],
  548. "source": []
  549. },
  550. {
  551. "cell_type": "code",
  552. "execution_count": null,
  553. "metadata": {},
  554. "outputs": [],
  555. "source": []
  556. },
  557. {
  558. "cell_type": "code",
  559. "execution_count": null,
  560. "metadata": {},
  561. "outputs": [],
  562. "source": []
  563. },
  564. {
  565. "cell_type": "code",
  566. "execution_count": null,
  567. "metadata": {},
  568. "outputs": [],
  569. "source": []
  570. },
  571. {
  572. "cell_type": "code",
  573. "execution_count": null,
  574. "metadata": {},
  575. "outputs": [],
  576. "source": []
  577. },
  578. {
  579. "cell_type": "code",
  580. "execution_count": null,
  581. "metadata": {},
  582. "outputs": [],
  583. "source": []
  584. }
  585. ],
  586. "metadata": {
  587. "kernelspec": {
  588. "display_name": "Python 3",
  589. "language": "python",
  590. "name": "python3"
  591. },
  592. "language_info": {
  593. "codemirror_mode": {
  594. "name": "ipython",
  595. "version": 3
  596. },
  597. "file_extension": ".py",
  598. "mimetype": "text/x-python",
  599. "name": "python",
  600. "nbconvert_exporter": "python",
  601. "pygments_lexer": "ipython3",
  602. "version": "3.6.8"
  603. }
  604. },
  605. "nbformat": 4,
  606. "nbformat_minor": 2
  607. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement