{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import partridge as ptg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the zip using `raw_feed` which does not parse or prune the files"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"feed = ptg.raw_feed('scratch/cmbc-translink.zip')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Inspect the original stops file"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id | \n",
" stop_code | \n",
" stop_name | \n",
"
\n",
" \n",
" \n",
" \n",
" | 5351 | \n",
" 1329 | \n",
" 51318 | \n",
" 22 TERMINUS UNDER KNIGHT ST BRIDGE | \n",
"
\n",
" \n",
" | 888 | \n",
" 10915 | \n",
" 58173 | \n",
" 22ND STREET STN BAY 1 | \n",
"
\n",
" \n",
" | 7941 | \n",
" 10423 | \n",
" 52165 | \n",
" 22ND STREET STN BAY 2 | \n",
"
\n",
" \n",
" | 2111 | \n",
" 3535 | \n",
" 53497 | \n",
" 22ND STREET STN BAY 3 | \n",
"
\n",
" \n",
" | 5822 | \n",
" 2252 | \n",
" 52230 | \n",
" 22ND STREET STN BAY 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id stop_code stop_name\n",
"5351 1329 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
"888 10915 58173 22ND STREET STN BAY 1\n",
"7941 10423 52165 22ND STREET STN BAY 2\n",
"2111 3535 53497 22ND STREET STN BAY 3\n",
"5822 2252 52230 22ND STREET STN BAY 4"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Replace empty `stop_code`'s with `stop_id`"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ensure `stop_id` and `stop_code` are unique"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"assert len(set(feed.stops.stop_id)) == len(feed.stops)\n",
"assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a dictionary mapping `stop_id` to `stop_code`"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Remove `stop_code` column"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"feed.stops.drop('stop_code', axis=1, inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Replace old references to `stop_id`"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def swap_id(stop_id):\n",
" return mapping[stop_id]\n",
"\n",
"feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n",
"feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n",
"\n",
"if not feed.transfers.empty:\n",
" feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n",
" feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n",
"\n",
"if 'parent_station' in feed.stops.columns:\n",
" # optional field, preserve nan's\n",
" feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a new GTFS file"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/drw/Code/partridge/scratch/cmbc-translink-swapped.zip'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import shutil\n",
"import tempfile\n",
"\n",
"outpath = 'scratch/cmbc-translink-swapped.zip'\n",
"\n",
"try:\n",
" tmpdir = tempfile.mkdtemp()\n",
" for node in ptg.writers.DEFAULT_NODES:\n",
" df = feed.get(node)\n",
" if not df.empty:\n",
" df.to_csv(os.path.join(tmpdir, node), index=False)\n",
" shutil.make_archive(os.path.splitext(outpath)[0], 'zip', tmpdir)\n",
"finally:\n",
" shutil.rmtree(tmpdir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Inspect the result"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" stop_id | \n",
" stop_name | \n",
"
\n",
" \n",
" \n",
" \n",
" | 5351 | \n",
" 51318 | \n",
" 22 TERMINUS UNDER KNIGHT ST BRIDGE | \n",
"
\n",
" \n",
" | 888 | \n",
" 58173 | \n",
" 22ND STREET STN BAY 1 | \n",
"
\n",
" \n",
" | 7941 | \n",
" 52165 | \n",
" 22ND STREET STN BAY 2 | \n",
"
\n",
" \n",
" | 2111 | \n",
" 53497 | \n",
" 22ND STREET STN BAY 3 | \n",
"
\n",
" \n",
" | 5822 | \n",
" 52230 | \n",
" 22ND STREET STN BAY 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" stop_id stop_name\n",
"5351 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
"888 58173 22ND STREET STN BAY 1\n",
"7941 52165 22ND STREET STN BAY 2\n",
"2111 53497 22ND STREET STN BAY 3\n",
"5822 52230 22ND STREET STN BAY 4"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}