Untitled

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Libraries imported.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "import json\n",
    "from geopy.geocoders import Nominatim\n",
    "import requests\n",
    "from pandas.io.json import json_normalize\n",
    "\n",
    "import matplotlib.cm as cm\n",
    "import matplotlib.colors as colors\n",
    "\n",
    "from sklearn.cluster import KMeans\n",
    "import folium\n",
    "\n",
    "print('Libraries imported.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>postcode</th>\n",
       "      <th>borough</th>\n",
       "      <th>neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Harbourfront</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Heights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>M7A</td>\n",
       "      <td>Queen's Park</td>\n",
       "      <td>Not assigned</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>M8A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  postcode           borough      neighborhood\n",
       "0      M1A      Not assigned      Not assigned\n",
       "1      M2A      Not assigned      Not assigned\n",
       "2      M3A        North York         Parkwoods\n",
       "3      M4A        North York  Victoria Village\n",
       "4      M5A  Downtown Toronto      Harbourfront\n",
       "5      M5A  Downtown Toronto       Regent Park\n",
       "6      M6A        North York  Lawrence Heights\n",
       "7      M6A        North York    Lawrence Manor\n",
       "8      M7A      Queen's Park      Not assigned\n",
       "9      M8A      Not assigned      Not assigned"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# importing data from csv file...\n",
    "# generating head of dataframe...\n",
    "df = pd.read_csv('postalcodes_data.csv')\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>postcode</th>\n",
       "      <th>borough</th>\n",
       "      <th>neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Malvern</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Rouge</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>M1C</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Highland Creek</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>M1C</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Rouge Hill</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>M1C</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Port Union</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>M1E</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>West Hill</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>M1E</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Morningside</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>M1E</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Guildwood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>M1G</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Woburn</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   postcode       borough    neighborhood\n",
       "0       M1A  Not assigned    Not assigned\n",
       "12      M1B   Scarborough         Malvern\n",
       "11      M1B   Scarborough           Rouge\n",
       "27      M1C   Scarborough  Highland Creek\n",
       "28      M1C   Scarborough      Rouge Hill\n",
       "29      M1C   Scarborough      Port Union\n",
       "44      M1E   Scarborough       West Hill\n",
       "43      M1E   Scarborough     Morningside\n",
       "42      M1E   Scarborough       Guildwood\n",
       "53      M1G   Scarborough          Woburn"
      ]
     },
     "execution_count": 187,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ascending order by postcode column...\n",
    "df.index.name = None\n",
    "df.sort_values(['postcode'], ascending=True, axis=0, inplace=True)\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ensuring data type consistency...\n",
    "all(isinstance(column, str) for column in df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>postcode</th>\n",
       "      <th>borough</th>\n",
       "      <th>neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>M1A</th>\n",
       "      <th>Not assigned</th>\n",
       "      <th>Not assigned</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">M1B</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Scarborough</th>\n",
       "      <th>Malvern</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rouge</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">M1C</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
       "      <th>Highland Creek</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Port Union</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rouge Hill</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">M1E</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
       "      <th>Guildwood</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Morningside</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Hill</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1G</th>\n",
       "      <th>Scarborough</th>\n",
       "      <th>Woburn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1H</th>\n",
       "      <th>Scarborough</th>\n",
       "      <th>Cedarbrae</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1J</th>\n",
       "      <th>Scarborough</th>\n",
       "      <th>Scarborough Village</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">M1K</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
       "      <th>East Birchmount Park</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ionview</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kennedy Park</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">M1L</th>\n",
       "      <th rowspan=\"3\" valign=\"top\">Scarborough</th>\n",
       "      <th>Clairlea</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Golden Mile</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oakridge</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">M1M</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Scarborough</th>\n",
       "      <th>Cliffcrest</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cliffside</th>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: [(M1A, Not assigned, Not assigned), (M1B, Scarborough, Malvern), (M1B, Scarborough, Rouge), (M1C, Scarborough, Highland Creek), (M1C, Scarborough, Port Union), (M1C, Scarborough, Rouge Hill), (M1E, Scarborough, Guildwood), (M1E, Scarborough, Morningside), (M1E, Scarborough, West Hill), (M1G, Scarborough, Woburn), (M1H, Scarborough, Cedarbrae), (M1J, Scarborough, Scarborough Village), (M1K, Scarborough, East Birchmount Park), (M1K, Scarborough, Ionview), (M1K, Scarborough, Kennedy Park), (M1L, Scarborough, Clairlea), (M1L, Scarborough, Golden Mile), (M1L, Scarborough, Oakridge), (M1M, Scarborough, Cliffcrest), (M1M, Scarborough, Cliffside)]"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merging and combining rows and grouping by all columns...\n",
    "df = df.groupby(['postcode', 'borough', 'neighborhood']).sum()\n",
    "df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>postcode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>M1A</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1B</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1C</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1E</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>M1G</th>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: [M1A, M1B, M1C, M1E, M1G]"
      ]
     },
     "execution_count": 195,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.groupby(['postcode']).sum()\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data dimensions: (180, 0)\n"
     ]
    }
   ],
   "source": [
    "# printing the number of rows of dataframe based on merged postcode column...\n",
    "print ('data dimensions:', df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}