Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import partridge as ptg"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Read the zip using `raw_feed` which does not parse or prune the files"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "feed = ptg.raw_feed('scratch/cmbc-translink.zip')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Inspect the original stops file"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>stop_id</th>\n",
- " <th>stop_code</th>\n",
- " <th>stop_name</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>5351</th>\n",
- " <td>1329</td>\n",
- " <td>51318</td>\n",
- " <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>888</th>\n",
- " <td>10915</td>\n",
- " <td>58173</td>\n",
- " <td>22ND STREET STN BAY 1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7941</th>\n",
- " <td>10423</td>\n",
- " <td>52165</td>\n",
- " <td>22ND STREET STN BAY 2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2111</th>\n",
- " <td>3535</td>\n",
- " <td>53497</td>\n",
- " <td>22ND STREET STN BAY 3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5822</th>\n",
- " <td>2252</td>\n",
- " <td>52230</td>\n",
- " <td>22ND STREET STN BAY 4</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " stop_id stop_code stop_name\n",
- "5351 1329 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
- "888 10915 58173 22ND STREET STN BAY 1\n",
- "7941 10423 52165 22ND STREET STN BAY 2\n",
- "2111 3535 53497 22ND STREET STN BAY 3\n",
- "5822 2252 52230 22ND STREET STN BAY 4"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Replace empty `stop_code`'s with `stop_id`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Ensure `stop_id` and `stop_code` are unique"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "assert len(set(feed.stops.stop_id)) == len(feed.stops)\n",
- "assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a dictionary mapping `stop_id` to `stop_code`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Remove `stop_code` column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "feed.stops.drop('stop_code', axis=1, inplace=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Replace old references to `stop_id`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def swap_id(stop_id):\n",
- " return mapping[stop_id]\n",
- "\n",
- "feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n",
- "feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n",
- "\n",
- "if not feed.transfers.empty:\n",
- " feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n",
- " feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n",
- "\n",
- "if 'parent_station' in feed.stops.columns:\n",
- " # optional field, preserve nan's\n",
- " feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a new GTFS file"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'/Users/drw/Code/partridge/scratch/cmbc-translink-swapped.zip'"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import os\n",
- "import shutil\n",
- "import tempfile\n",
- "\n",
- "outpath = 'scratch/cmbc-translink-swapped.zip'\n",
- "\n",
- "try:\n",
- " tmpdir = tempfile.mkdtemp()\n",
- " for node in ptg.writers.DEFAULT_NODES:\n",
- " df = feed.get(node)\n",
- " if not df.empty:\n",
- " df.to_csv(os.path.join(tmpdir, node), index=False)\n",
- " shutil.make_archive(os.path.splitext(outpath)[0], 'zip', tmpdir)\n",
- "finally:\n",
- " shutil.rmtree(tmpdir)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Inspect the result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>stop_id</th>\n",
- " <th>stop_name</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>5351</th>\n",
- " <td>51318</td>\n",
- " <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>888</th>\n",
- " <td>58173</td>\n",
- " <td>22ND STREET STN BAY 1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7941</th>\n",
- " <td>52165</td>\n",
- " <td>22ND STREET STN BAY 2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2111</th>\n",
- " <td>53497</td>\n",
- " <td>22ND STREET STN BAY 3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5822</th>\n",
- " <td>52230</td>\n",
- " <td>22ND STREET STN BAY 4</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " stop_id stop_name\n",
- "5351 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
- "888 58173 22ND STREET STN BAY 1\n",
- "7941 52165 22ND STREET STN BAY 2\n",
- "2111 53497 22ND STREET STN BAY 3\n",
- "5822 52230 22ND STREET STN BAY 4"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment