Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### installing packeges"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 347,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: beautifulsoup4 in /home/jupyterlab/conda/lib/python3.6/site-packages (4.7.1)\n",
- "Requirement already satisfied: soupsieve>=1.2 in /home/jupyterlab/conda/lib/python3.6/site-packages (from beautifulsoup4) (1.8)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
- "source": [
- "pip install beautifulsoup4"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 348,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: lxml in /home/jupyterlab/conda/lib/python3.6/site-packages (4.3.0)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
- "source": [
- "pip install lxml"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 349,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: html5lib in /home/jupyterlab/conda/lib/python3.6/site-packages (0.9999999)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
- "source": [
- "pip install html5lib"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 350,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: requests in /home/jupyterlab/conda/lib/python3.6/site-packages (2.21.0)\n",
- "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/jupyterlab/conda/lib/python3.6/site-packages (from requests) (3.0.4)\n",
- "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /home/jupyterlab/conda/lib/python3.6/site-packages (from requests) (1.24.1)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /home/jupyterlab/conda/lib/python3.6/site-packages (from requests) (2019.3.9)\n",
- "Requirement already satisfied: idna<2.9,>=2.5 in /home/jupyterlab/conda/lib/python3.6/site-packages (from requests) (2.8)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
- "source": [
- "pip install requests"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 351,
- "metadata": {},
- "outputs": [],
- "source": [
- "from bs4 import BeautifulSoup"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 352,
- "metadata": {},
- "outputs": [],
- "source": [
- "import requests\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### open html file"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 353,
- "metadata": {},
- "outputs": [],
- "source": [
- "path='Wikipedia.html'\n",
- "with open(path) as html_file:\n",
- " soup=BeautifulSoup(html_file,'lxml')\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### hendlind data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 407,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Postcode</th>\n",
- " <th>Borough</th>\n",
- " <th>Neighbourhood</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>M1B</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Malvern,Rouge</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>M1C</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Highland Creek,Port Union,Rouge Hill</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>M1E</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Guildwood,Morningside,West Hill</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>M1G</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Woburn</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>M1H</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Cedarbrae</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>M1J</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Scarborough Village</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>M1K</td>\n",
- " <td>Scarborough</td>\n",
- " <td>East Birchmount Park,Ionview,Kennedy Park</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>M1L</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Clairlea,Golden Mile,Oakridge</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>M1M</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Cliffcrest,Cliffside,Scarborough Village West</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>M1N</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Birch Cliff,Cliffside West</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>M1P</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Dorset Park,Scarborough Town Centre,Wexford He...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>M1R</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Maryvale,Wexford</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>M1S</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Agincourt</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>M1T</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Clarks Corners,Sullivan,Tam O'Shanter</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>M1V</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Agincourt North,L'Amoreaux East,Milliken,Steel...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td>M1W</td>\n",
- " <td>Scarborough</td>\n",
- " <td>L'Amoreaux West</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>16</th>\n",
- " <td>M1X</td>\n",
- " <td>Scarborough</td>\n",
- " <td>Upper Rouge</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17</th>\n",
- " <td>M2H</td>\n",
- " <td>North York</td>\n",
- " <td>Hillcrest Village</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>18</th>\n",
- " <td>M2J</td>\n",
- " <td>North York</td>\n",
- " <td>Fairview,Henry Farm,Oriole</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>19</th>\n",
- " <td>M2K</td>\n",
- " <td>North York</td>\n",
- " <td>Bayview Village</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>20</th>\n",
- " <td>M2L</td>\n",
- " <td>North York</td>\n",
- " <td>Silver Hills,York Mills</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>21</th>\n",
- " <td>M2M</td>\n",
- " <td>North York</td>\n",
- " <td>Newtonbrook,Willowdale</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>22</th>\n",
- " <td>M2N</td>\n",
- " <td>North York</td>\n",
- " <td>Willowdale South</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23</th>\n",
- " <td>M2P</td>\n",
- " <td>North York</td>\n",
- " <td>York Mills West</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>24</th>\n",
- " <td>M2R</td>\n",
- " <td>North York</td>\n",
- " <td>Willowdale West</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25</th>\n",
- " <td>M3A</td>\n",
- " <td>North York</td>\n",
- " <td>Parkwoods</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26</th>\n",
- " <td>M3B</td>\n",
- " <td>North York</td>\n",
- " <td>Don Mills North</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>27</th>\n",
- " <td>M3C</td>\n",
- " <td>North York</td>\n",
- " <td>Don Mills South,Flemingdon Park</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>28</th>\n",
- " <td>M3H</td>\n",
- " <td>North York</td>\n",
- " <td>Bathurst Manor,Downsview North,Wilson Heights</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>29</th>\n",
- " <td>M3J</td>\n",
- " <td>North York</td>\n",
- " <td>Northwood Park,York University</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>73</th>\n",
- " <td>M6C</td>\n",
- " <td>York</td>\n",
- " <td>Humewood-Cedarvale</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>74</th>\n",
- " <td>M6E</td>\n",
- " <td>York</td>\n",
- " <td>Caledonia-Fairbanks</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>75</th>\n",
- " <td>M6G</td>\n",
- " <td>Downtown Toronto</td>\n",
- " <td>Christie</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>76</th>\n",
- " <td>M6H</td>\n",
- " <td>West Toronto</td>\n",
- " <td>Dovercourt Village,Dufferin</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>77</th>\n",
- " <td>M6J</td>\n",
- " <td>West Toronto</td>\n",
- " <td>Little Portugal,Trinity</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>78</th>\n",
- " <td>M6K</td>\n",
- " <td>West Toronto</td>\n",
- " <td>Brockton,Exhibition Place,Parkdale Village</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>79</th>\n",
- " <td>M6L</td>\n",
- " <td>North York</td>\n",
- " <td>Downsview,North Park,Upwood Park</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>80</th>\n",
- " <td>M6M</td>\n",
- " <td>York</td>\n",
- " <td>Del Ray,Keelesdale,Mount Dennis,Silverthorn</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>81</th>\n",
- " <td>M6N</td>\n",
- " <td>York</td>\n",
- " <td>Runnymede,The Junction North</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>82</th>\n",
- " <td>M6P</td>\n",
- " <td>West Toronto</td>\n",
- " <td>High Park,The Junction South</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>83</th>\n",
- " <td>M6R</td>\n",
- " <td>West Toronto</td>\n",
- " <td>Parkdale,Roncesvalles</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>84</th>\n",
- " <td>M6S</td>\n",
- " <td>West Toronto</td>\n",
- " <td>Runnymede,Swansea</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>85</th>\n",
- " <td>M7A</td>\n",
- " <td>Queen's Park</td>\n",
- " <td>Queen's Park</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>86</th>\n",
- " <td>M7R</td>\n",
- " <td>Mississauga</td>\n",
- " <td>Canada Post Gateway Processing Centre</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>87</th>\n",
- " <td>M7Y</td>\n",
- " <td>East Toronto</td>\n",
- " <td>Business Reply Mail Processing Centre 969 Eastern</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>88</th>\n",
- " <td>M8V</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Humber Bay Shores,Mimico South,New Toronto</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>89</th>\n",
- " <td>M8W</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Alderwood,Long Branch</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>90</th>\n",
- " <td>M8X</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Montgomery Road,Old Mill North,The Kingsway</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>91</th>\n",
- " <td>M8Y</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Humber Bay,King's Mill Park,Kingsway Park Sout...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>92</th>\n",
- " <td>M8Z</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Kingsway Park South West,Mimico NW,Royal York ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>93</th>\n",
- " <td>M9A</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Islington Avenue</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>94</th>\n",
- " <td>M9B</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Cloverdale,Islington,Martin Grove,Princess Gar...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>95</th>\n",
- " <td>M9C</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Bloordale Gardens,Eringate,Markland Wood,Old B...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>96</th>\n",
- " <td>M9L</td>\n",
- " <td>North York</td>\n",
- " <td>Humber Summit</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>97</th>\n",
- " <td>M9M</td>\n",
- " <td>North York</td>\n",
- " <td>Emery,Humberlea</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>98</th>\n",
- " <td>M9N</td>\n",
- " <td>York</td>\n",
- " <td>Weston</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>99</th>\n",
- " <td>M9P</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Westmount</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>100</th>\n",
- " <td>M9R</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Kingsview Village,Martin Grove Gardens,Richvie...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>101</th>\n",
- " <td>M9V</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Albion Gardens,Beaumond Heights,Humbergate,Jam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>102</th>\n",
- " <td>M9W</td>\n",
- " <td>Etobicoke</td>\n",
- " <td>Northwest</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>103 rows × 3 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Postcode Borough \\\n",
- "0 M1B Scarborough \n",
- "1 M1C Scarborough \n",
- "2 M1E Scarborough \n",
- "3 M1G Scarborough \n",
- "4 M1H Scarborough \n",
- "5 M1J Scarborough \n",
- "6 M1K Scarborough \n",
- "7 M1L Scarborough \n",
- "8 M1M Scarborough \n",
- "9 M1N Scarborough \n",
- "10 M1P Scarborough \n",
- "11 M1R Scarborough \n",
- "12 M1S Scarborough \n",
- "13 M1T Scarborough \n",
- "14 M1V Scarborough \n",
- "15 M1W Scarborough \n",
- "16 M1X Scarborough \n",
- "17 M2H North York \n",
- "18 M2J North York \n",
- "19 M2K North York \n",
- "20 M2L North York \n",
- "21 M2M North York \n",
- "22 M2N North York \n",
- "23 M2P North York \n",
- "24 M2R North York \n",
- "25 M3A North York \n",
- "26 M3B North York \n",
- "27 M3C North York \n",
- "28 M3H North York \n",
- "29 M3J North York \n",
- ".. ... ... \n",
- "73 M6C York \n",
- "74 M6E York \n",
- "75 M6G Downtown Toronto \n",
- "76 M6H West Toronto \n",
- "77 M6J West Toronto \n",
- "78 M6K West Toronto \n",
- "79 M6L North York \n",
- "80 M6M York \n",
- "81 M6N York \n",
- "82 M6P West Toronto \n",
- "83 M6R West Toronto \n",
- "84 M6S West Toronto \n",
- "85 M7A Queen's Park \n",
- "86 M7R Mississauga \n",
- "87 M7Y East Toronto \n",
- "88 M8V Etobicoke \n",
- "89 M8W Etobicoke \n",
- "90 M8X Etobicoke \n",
- "91 M8Y Etobicoke \n",
- "92 M8Z Etobicoke \n",
- "93 M9A Etobicoke \n",
- "94 M9B Etobicoke \n",
- "95 M9C Etobicoke \n",
- "96 M9L North York \n",
- "97 M9M North York \n",
- "98 M9N York \n",
- "99 M9P Etobicoke \n",
- "100 M9R Etobicoke \n",
- "101 M9V Etobicoke \n",
- "102 M9W Etobicoke \n",
- "\n",
- " Neighbourhood \n",
- "0 Malvern,Rouge \n",
- "1 Highland Creek,Port Union,Rouge Hill \n",
- "2 Guildwood,Morningside,West Hill \n",
- "3 Woburn \n",
- "4 Cedarbrae \n",
- "5 Scarborough Village \n",
- "6 East Birchmount Park,Ionview,Kennedy Park \n",
- "7 Clairlea,Golden Mile,Oakridge \n",
- "8 Cliffcrest,Cliffside,Scarborough Village West \n",
- "9 Birch Cliff,Cliffside West \n",
- "10 Dorset Park,Scarborough Town Centre,Wexford He... \n",
- "11 Maryvale,Wexford \n",
- "12 Agincourt \n",
- "13 Clarks Corners,Sullivan,Tam O'Shanter \n",
- "14 Agincourt North,L'Amoreaux East,Milliken,Steel... \n",
- "15 L'Amoreaux West \n",
- "16 Upper Rouge \n",
- "17 Hillcrest Village \n",
- "18 Fairview,Henry Farm,Oriole \n",
- "19 Bayview Village \n",
- "20 Silver Hills,York Mills \n",
- "21 Newtonbrook,Willowdale \n",
- "22 Willowdale South \n",
- "23 York Mills West \n",
- "24 Willowdale West \n",
- "25 Parkwoods \n",
- "26 Don Mills North \n",
- "27 Don Mills South,Flemingdon Park \n",
- "28 Bathurst Manor,Downsview North,Wilson Heights \n",
- "29 Northwood Park,York University \n",
- ".. ... \n",
- "73 Humewood-Cedarvale \n",
- "74 Caledonia-Fairbanks \n",
- "75 Christie \n",
- "76 Dovercourt Village,Dufferin \n",
- "77 Little Portugal,Trinity \n",
- "78 Brockton,Exhibition Place,Parkdale Village \n",
- "79 Downsview,North Park,Upwood Park \n",
- "80 Del Ray,Keelesdale,Mount Dennis,Silverthorn \n",
- "81 Runnymede,The Junction North \n",
- "82 High Park,The Junction South \n",
- "83 Parkdale,Roncesvalles \n",
- "84 Runnymede,Swansea \n",
- "85 Queen's Park \n",
- "86 Canada Post Gateway Processing Centre \n",
- "87 Business Reply Mail Processing Centre 969 Eastern \n",
- "88 Humber Bay Shores,Mimico South,New Toronto \n",
- "89 Alderwood,Long Branch \n",
- "90 Montgomery Road,Old Mill North,The Kingsway \n",
- "91 Humber Bay,King's Mill Park,Kingsway Park Sout... \n",
- "92 Kingsway Park South West,Mimico NW,Royal York ... \n",
- "93 Islington Avenue \n",
- "94 Cloverdale,Islington,Martin Grove,Princess Gar... \n",
- "95 Bloordale Gardens,Eringate,Markland Wood,Old B... \n",
- "96 Humber Summit \n",
- "97 Emery,Humberlea \n",
- "98 Weston \n",
- "99 Westmount \n",
- "100 Kingsview Village,Martin Grove Gardens,Richvie... \n",
- "101 Albion Gardens,Beaumond Heights,Humbergate,Jam... \n",
- "102 Northwest \n",
- "\n",
- "[103 rows x 3 columns]"
- ]
- },
- "execution_count": 407,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# locate the table and it's content, in the file\n",
- "table=soup.table\n",
- "table_rows=table.find_all('tr')\n",
- "\n",
- "# locating the headers\n",
- "column=[]\n",
- "th=table.find_all('th')\n",
- "for th2 in th:\n",
- " # assigning headers to a list\n",
- " column=column+[th2.text]\n",
- " \n",
- "# locating row's data\n",
- "rows=[] \n",
- "for tr in table_rows:\n",
- " td=tr.find_all('td')\n",
- " # assigning row's data to a list\n",
- " rows=rows+[i.text for i in td]\n",
- " \n",
- "#seperating the list to 3 lists for each column\n",
- "head1=[]\n",
- "head2=[]\n",
- "head3=[]\n",
- "\n",
- "l=len(rows)/3\n",
- "l=int(l)\n",
- "\n",
- "for r in range(1,l+1):\n",
- " head1=head1+[rows[r*3-3]]\n",
- " head2=head2+[rows[r*3+1-3]]\n",
- " head3=head3+[rows[r*3+2-3]]\n",
- " \n",
- "#remove \\n from the data\n",
- "for i in range(0,l):\n",
- " head3[i]=head3[i].replace('\\n','')\n",
- " \n",
- "for i in range(0,3):\n",
- " column[i]=column[i].replace('\\n','') \n",
- "\n",
- "#create dictionary withe the columns and rows\n",
- "dict={}\n",
- "dict={column[0]:head1,column[1]:head2,column[2]:head3}\n",
- " \n",
- "#create data frame\n",
- "df=pd.DataFrame(dict)\n",
- "\n",
- "#remove rows where the 'Borough' cell is 'Not assigned'\n",
- "df=df[df.Borough != 'Not assigned']\n",
- " \n",
- "#if a 'Neighbourhood' cell is 'Not assigned' -> copy 'Borough' cell\n",
- "l=len(df)\n",
- "\n",
- "for i in range (0,l):\n",
- " if (df.iloc[i,2]=='Not assigned'):\n",
- " df.iloc[i,2]=df.iloc[i,1]\n",
- " \n",
- "#group by 'Postcode'\n",
- "df2=df.groupby('Postcode').agg(','.join)\n",
- "\n",
- "#add index column\n",
- "df2.reset_index(inplace=True)\n",
- "\n",
- "#crate new data frame with 'Borough' columns seperated by ','\n",
- "df3 = df2[\"Borough\"].str.split(\",\", n = 1, expand = True) \n",
- "\n",
- "#appending new 'Borough' column to the old data frame\n",
- "df2['Borough']=df3[0]\n",
- "df2\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 401,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(103, 3)"
- ]
- },
- "execution_count": 401,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df2.shape"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement