Guest User

Untitled

a guest
Feb 19th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.91 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import numpy as np\n",
  10. "import partridge as ptg"
  11. ]
  12. },
  13. {
  14. "cell_type": "markdown",
  15. "metadata": {},
  16. "source": [
  17. "Read the zip using `raw_feed` which does not parse or prune the files"
  18. ]
  19. },
  20. {
  21. "cell_type": "code",
  22. "execution_count": 2,
  23. "metadata": {},
  24. "outputs": [],
  25. "source": [
  26. "feed = ptg.raw_feed('scratch/cmbc-translink.zip')"
  27. ]
  28. },
  29. {
  30. "cell_type": "markdown",
  31. "metadata": {},
  32. "source": [
  33. "Inspect the original stops file"
  34. ]
  35. },
  36. {
  37. "cell_type": "code",
  38. "execution_count": 3,
  39. "metadata": {},
  40. "outputs": [
  41. {
  42. "data": {
  43. "text/html": [
  44. "<div>\n",
  45. "<style>\n",
  46. " .dataframe thead tr:only-child th {\n",
  47. " text-align: right;\n",
  48. " }\n",
  49. "\n",
  50. " .dataframe thead th {\n",
  51. " text-align: left;\n",
  52. " }\n",
  53. "\n",
  54. " .dataframe tbody tr th {\n",
  55. " vertical-align: top;\n",
  56. " }\n",
  57. "</style>\n",
  58. "<table border=\"1\" class=\"dataframe\">\n",
  59. " <thead>\n",
  60. " <tr style=\"text-align: right;\">\n",
  61. " <th></th>\n",
  62. " <th>stop_id</th>\n",
  63. " <th>stop_code</th>\n",
  64. " <th>stop_name</th>\n",
  65. " </tr>\n",
  66. " </thead>\n",
  67. " <tbody>\n",
  68. " <tr>\n",
  69. " <th>5351</th>\n",
  70. " <td>1329</td>\n",
  71. " <td>51318</td>\n",
  72. " <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n",
  73. " </tr>\n",
  74. " <tr>\n",
  75. " <th>888</th>\n",
  76. " <td>10915</td>\n",
  77. " <td>58173</td>\n",
  78. " <td>22ND STREET STN BAY 1</td>\n",
  79. " </tr>\n",
  80. " <tr>\n",
  81. " <th>7941</th>\n",
  82. " <td>10423</td>\n",
  83. " <td>52165</td>\n",
  84. " <td>22ND STREET STN BAY 2</td>\n",
  85. " </tr>\n",
  86. " <tr>\n",
  87. " <th>2111</th>\n",
  88. " <td>3535</td>\n",
  89. " <td>53497</td>\n",
  90. " <td>22ND STREET STN BAY 3</td>\n",
  91. " </tr>\n",
  92. " <tr>\n",
  93. " <th>5822</th>\n",
  94. " <td>2252</td>\n",
  95. " <td>52230</td>\n",
  96. " <td>22ND STREET STN BAY 4</td>\n",
  97. " </tr>\n",
  98. " </tbody>\n",
  99. "</table>\n",
  100. "</div>"
  101. ],
  102. "text/plain": [
  103. " stop_id stop_code stop_name\n",
  104. "5351 1329 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
  105. "888 10915 58173 22ND STREET STN BAY 1\n",
  106. "7941 10423 52165 22ND STREET STN BAY 2\n",
  107. "2111 3535 53497 22ND STREET STN BAY 3\n",
  108. "5822 2252 52230 22ND STREET STN BAY 4"
  109. ]
  110. },
  111. "execution_count": 3,
  112. "metadata": {},
  113. "output_type": "execute_result"
  114. }
  115. ],
  116. "source": [
  117. "feed.stops.sort_values('stop_name')[['stop_id', 'stop_code', 'stop_name']].head()"
  118. ]
  119. },
  120. {
  121. "cell_type": "markdown",
  122. "metadata": {},
  123. "source": [
  124. "Replace empty `stop_code`'s with `stop_id`"
  125. ]
  126. },
  127. {
  128. "cell_type": "code",
  129. "execution_count": 4,
  130. "metadata": {},
  131. "outputs": [],
  132. "source": [
  133. "feed.stops.stop_code = feed.stops.stop_code.fillna(feed.stops.stop_id)"
  134. ]
  135. },
  136. {
  137. "cell_type": "markdown",
  138. "metadata": {},
  139. "source": [
  140. "Ensure `stop_id` and `stop_code` are unique"
  141. ]
  142. },
  143. {
  144. "cell_type": "code",
  145. "execution_count": 5,
  146. "metadata": {},
  147. "outputs": [],
  148. "source": [
  149. "assert len(set(feed.stops.stop_id)) == len(feed.stops)\n",
  150. "assert len(set(feed.stops.stop_code)) == len(set(feed.stops.stop_id))"
  151. ]
  152. },
  153. {
  154. "cell_type": "markdown",
  155. "metadata": {},
  156. "source": [
  157. "Create a dictionary mapping `stop_id` to `stop_code`"
  158. ]
  159. },
  160. {
  161. "cell_type": "code",
  162. "execution_count": 6,
  163. "metadata": {},
  164. "outputs": [],
  165. "source": [
  166. "mapping = dict(feed.stops[['stop_id', 'stop_code']].as_matrix())"
  167. ]
  168. },
  169. {
  170. "cell_type": "markdown",
  171. "metadata": {},
  172. "source": [
  173. "Remove `stop_code` column"
  174. ]
  175. },
  176. {
  177. "cell_type": "code",
  178. "execution_count": 7,
  179. "metadata": {},
  180. "outputs": [],
  181. "source": [
  182. "feed.stops.drop('stop_code', axis=1, inplace=True)"
  183. ]
  184. },
  185. {
  186. "cell_type": "markdown",
  187. "metadata": {},
  188. "source": [
  189. "Replace old references to `stop_id`"
  190. ]
  191. },
  192. {
  193. "cell_type": "code",
  194. "execution_count": 8,
  195. "metadata": {},
  196. "outputs": [],
  197. "source": [
  198. "def swap_id(stop_id):\n",
  199. " return mapping[stop_id]\n",
  200. "\n",
  201. "feed.stops.stop_id = feed.stops.stop_id.apply(swap_id)\n",
  202. "feed.stop_times.stop_id = feed.stop_times.stop_id.apply(swap_id)\n",
  203. "\n",
  204. "if not feed.transfers.empty:\n",
  205. " feed.transfers.from_stop_id = feed.transfers.from_stop_id.apply(swap_id)\n",
  206. " feed.transfers.to_stop_id = feed.transfers.to_stop_id.apply(swap_id)\n",
  207. "\n",
  208. "if 'parent_station' in feed.stops.columns:\n",
  209. " # optional field, preserve nan's\n",
  210. " feed.stops.parent_station = feed.stops.parent_station.apply(mapping.get, args=(np.nan,))"
  211. ]
  212. },
  213. {
  214. "cell_type": "markdown",
  215. "metadata": {},
  216. "source": [
  217. "Create a new GTFS file"
  218. ]
  219. },
  220. {
  221. "cell_type": "code",
  222. "execution_count": 9,
  223. "metadata": {},
  224. "outputs": [
  225. {
  226. "data": {
  227. "text/plain": [
  228. "'/Users/drw/Code/partridge/scratch/cmbc-translink-swapped.zip'"
  229. ]
  230. },
  231. "execution_count": 9,
  232. "metadata": {},
  233. "output_type": "execute_result"
  234. }
  235. ],
  236. "source": [
  237. "import os\n",
  238. "import shutil\n",
  239. "import tempfile\n",
  240. "\n",
  241. "outpath = 'scratch/cmbc-translink-swapped.zip'\n",
  242. "\n",
  243. "try:\n",
  244. " tmpdir = tempfile.mkdtemp()\n",
  245. " for node in ptg.writers.DEFAULT_NODES:\n",
  246. " df = feed.get(node)\n",
  247. " if not df.empty:\n",
  248. " df.to_csv(os.path.join(tmpdir, node), index=False)\n",
  249. " shutil.make_archive(os.path.splitext(outpath)[0], 'zip', tmpdir)\n",
  250. "finally:\n",
  251. " shutil.rmtree(tmpdir)"
  252. ]
  253. },
  254. {
  255. "cell_type": "markdown",
  256. "metadata": {},
  257. "source": [
  258. "Inspect the result"
  259. ]
  260. },
  261. {
  262. "cell_type": "code",
  263. "execution_count": 10,
  264. "metadata": {},
  265. "outputs": [
  266. {
  267. "data": {
  268. "text/html": [
  269. "<div>\n",
  270. "<style>\n",
  271. " .dataframe thead tr:only-child th {\n",
  272. " text-align: right;\n",
  273. " }\n",
  274. "\n",
  275. " .dataframe thead th {\n",
  276. " text-align: left;\n",
  277. " }\n",
  278. "\n",
  279. " .dataframe tbody tr th {\n",
  280. " vertical-align: top;\n",
  281. " }\n",
  282. "</style>\n",
  283. "<table border=\"1\" class=\"dataframe\">\n",
  284. " <thead>\n",
  285. " <tr style=\"text-align: right;\">\n",
  286. " <th></th>\n",
  287. " <th>stop_id</th>\n",
  288. " <th>stop_name</th>\n",
  289. " </tr>\n",
  290. " </thead>\n",
  291. " <tbody>\n",
  292. " <tr>\n",
  293. " <th>5351</th>\n",
  294. " <td>51318</td>\n",
  295. " <td>22 TERMINUS UNDER KNIGHT ST BRIDGE</td>\n",
  296. " </tr>\n",
  297. " <tr>\n",
  298. " <th>888</th>\n",
  299. " <td>58173</td>\n",
  300. " <td>22ND STREET STN BAY 1</td>\n",
  301. " </tr>\n",
  302. " <tr>\n",
  303. " <th>7941</th>\n",
  304. " <td>52165</td>\n",
  305. " <td>22ND STREET STN BAY 2</td>\n",
  306. " </tr>\n",
  307. " <tr>\n",
  308. " <th>2111</th>\n",
  309. " <td>53497</td>\n",
  310. " <td>22ND STREET STN BAY 3</td>\n",
  311. " </tr>\n",
  312. " <tr>\n",
  313. " <th>5822</th>\n",
  314. " <td>52230</td>\n",
  315. " <td>22ND STREET STN BAY 4</td>\n",
  316. " </tr>\n",
  317. " </tbody>\n",
  318. "</table>\n",
  319. "</div>"
  320. ],
  321. "text/plain": [
  322. " stop_id stop_name\n",
  323. "5351 51318 22 TERMINUS UNDER KNIGHT ST BRIDGE\n",
  324. "888 58173 22ND STREET STN BAY 1\n",
  325. "7941 52165 22ND STREET STN BAY 2\n",
  326. "2111 53497 22ND STREET STN BAY 3\n",
  327. "5822 52230 22ND STREET STN BAY 4"
  328. ]
  329. },
  330. "execution_count": 10,
  331. "metadata": {},
  332. "output_type": "execute_result"
  333. }
  334. ],
  335. "source": [
  336. "ptg.feed(outpath).stops.sort_values('stop_name')[['stop_id', 'stop_name']].head()"
  337. ]
  338. }
  339. ],
  340. "metadata": {
  341. "kernelspec": {
  342. "display_name": "Python 3",
  343. "language": "python",
  344. "name": "python3"
  345. },
  346. "language_info": {
  347. "codemirror_mode": {
  348. "name": "ipython",
  349. "version": 3
  350. },
  351. "file_extension": ".py",
  352. "mimetype": "text/x-python",
  353. "name": "python",
  354. "nbconvert_exporter": "python",
  355. "pygments_lexer": "ipython3",
  356. "version": "3.6.2"
  357. }
  358. },
  359. "nbformat": 4,
  360. "nbformat_minor": 2
  361. }
Add Comment
Please, Sign In to add comment