Advertisement
Guest User

Untitled

a guest
Aug 28th, 2016
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.99 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 20,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [],
  10. "source": [
  11. "import pandas as pd\n",
  12. "from geopy.geocoders import GoogleV3\n",
  13. "import json\n",
  14. "import googlemaps"
  15. ]
  16. },
  17. {
  18. "cell_type": "code",
  19. "execution_count": 105,
  20. "metadata": {
  21. "collapsed": false,
  22. "scrolled": true
  23. },
  24. "outputs": [],
  25. "source": [
  26. "# Add the data using pandas and check that it's working correctly\n",
  27. "data = pd.read_excel('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/Mod TRACKSmapping Allentown.xlsx',sheetname='Location Information')"
  28. ]
  29. },
  30. {
  31. "cell_type": "code",
  32. "execution_count": 106,
  33. "metadata": {
  34. "collapsed": false,
  35. "scrolled": true
  36. },
  37. "outputs": [
  38. {
  39. "name": "stdout",
  40. "output_type": "stream",
  41. "text": [
  42. "(366, 14)\n"
  43. ]
  44. },
  45. {
  46. "data": {
  47. "text/plain": [
  48. "0 NaN\n",
  49. "1 NaN\n",
  50. "2 NaN\n",
  51. "3 5th & Linden, Allentown, PA\n",
  52. "4 NaN\n",
  53. "Name: gcaddress, dtype: object"
  54. ]
  55. },
  56. "execution_count": 106,
  57. "metadata": {},
  58. "output_type": "execute_result"
  59. }
  60. ],
  61. "source": [
  62. "# Create a merged address field\n",
  63. "data['gcaddress'] = data['CrossStreet A'] + \" & \" + data['CrossStreet B'] + \", \" + data['City'] + \", \" + data['State (eg PA)']\n",
  64. "print(data.shape)\n",
  65. "data['gcaddress'].head()"
  66. ]
  67. },
  68. {
  69. "cell_type": "code",
  70. "execution_count": 12,
  71. "metadata": {
  72. "collapsed": false
  73. },
  74. "outputs": [
  75. {
  76. "data": {
  77. "text/plain": [
  78. "(108, 14)"
  79. ]
  80. },
  81. "execution_count": 12,
  82. "metadata": {},
  83. "output_type": "execute_result"
  84. }
  85. ],
  86. "source": [
  87. "'''# Drop rows with missing addresse values\n",
  88. "cleandata = data.dropna(subset=['gcaddress'])\n",
  89. "lessdata = cleandata.groupby(['gcaddress']).head(1)\n",
  90. "lessdata.shape'''"
  91. ]
  92. },
  93. {
  94. "cell_type": "markdown",
  95. "metadata": {},
  96. "source": [
  97. "### Geocode using googlemaps"
  98. ]
  99. },
  100. {
  101. "cell_type": "code",
  102. "execution_count": 107,
  103. "metadata": {
  104. "collapsed": false
  105. },
  106. "outputs": [],
  107. "source": [
  108. "#### Test using google maps\n",
  109. "#gmaps.geocode('1716 Mount Vernon St Philadelphia PA')[0]['geometry']['location']['lat']\n",
  110. "#[0]['']['lat']\n",
  111. "gmaps = googlemaps.Client(key='',queries_per_second=50)"
  112. ]
  113. },
  114. {
  115. "cell_type": "code",
  116. "execution_count": 108,
  117. "metadata": {
  118. "collapsed": false
  119. },
  120. "outputs": [],
  121. "source": [
  122. "data['lat'] = ''\n",
  123. "data['lon'] = ''"
  124. ]
  125. },
  126. {
  127. "cell_type": "code",
  128. "execution_count": 109,
  129. "metadata": {
  130. "collapsed": false
  131. },
  132. "outputs": [
  133. {
  134. "name": "stdout",
  135. "output_type": "stream",
  136. "text": [
  137. "Geocoding complete\n"
  138. ]
  139. }
  140. ],
  141. "source": [
  142. "data['lat'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lat']))\n",
  143. "data['lon'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lng']))\n",
  144. "print('Geocoding complete')"
  145. ]
  146. },
  147. {
  148. "cell_type": "code",
  149. "execution_count": 110,
  150. "metadata": {
  151. "collapsed": false
  152. },
  153. "outputs": [
  154. {
  155. "data": {
  156. "text/html": [
  157. "<div>\n",
  158. "<table border=\"1\" class=\"dataframe\">\n",
  159. " <thead>\n",
  160. " <tr style=\"text-align: right;\">\n",
  161. " <th></th>\n",
  162. " <th>Item_Number</th>\n",
  163. " <th>CrossStreet A Question Name</th>\n",
  164. " <th>CrossStreet A</th>\n",
  165. " <th>CrossStreet B Question Name</th>\n",
  166. " <th>CrossStreet B</th>\n",
  167. " <th>Distance Descriptor</th>\n",
  168. " <th>City</th>\n",
  169. " <th>State (eg PA)</th>\n",
  170. " <th>Zipcode</th>\n",
  171. " <th>Item 1D (Participant ID)</th>\n",
  172. " <th>Optional: Exact Street Address</th>\n",
  173. " <th>Community</th>\n",
  174. " <th>Notes</th>\n",
  175. " <th>gcaddress</th>\n",
  176. " <th>lat</th>\n",
  177. " <th>lon</th>\n",
  178. " </tr>\n",
  179. " </thead>\n",
  180. " <tbody>\n",
  181. " <tr>\n",
  182. " <th>0</th>\n",
  183. " <td>Item_6</td>\n",
  184. " <td>NaN</td>\n",
  185. " <td>NaN</td>\n",
  186. " <td>NaN</td>\n",
  187. " <td>NaN</td>\n",
  188. " <td>0.5 to 1</td>\n",
  189. " <td>Allentown</td>\n",
  190. " <td>PA</td>\n",
  191. " <td>18102</td>\n",
  192. " <td>Allen316c2</td>\n",
  193. " <td>101 Tilghman St</td>\n",
  194. " <td>Allentown</td>\n",
  195. " <td>Elias Market \"Syrian place\"?</td>\n",
  196. " <td>NaN</td>\n",
  197. " <td>18.775632</td>\n",
  198. " <td>100.773042</td>\n",
  199. " </tr>\n",
  200. " <tr>\n",
  201. " <th>1</th>\n",
  202. " <td>Item_4</td>\n",
  203. " <td>NaN</td>\n",
  204. " <td>NaN</td>\n",
  205. " <td>NaN</td>\n",
  206. " <td>NaN</td>\n",
  207. " <td>1 to 2</td>\n",
  208. " <td>Allentown</td>\n",
  209. " <td>PA</td>\n",
  210. " <td>18102</td>\n",
  211. " <td>Allen324s7</td>\n",
  212. " <td>106 N 17th Street</td>\n",
  213. " <td>Allentown</td>\n",
  214. " <td>\"William\" school (William Allen HS?)</td>\n",
  215. " <td>NaN</td>\n",
  216. " <td>18.775632</td>\n",
  217. " <td>100.773042</td>\n",
  218. " </tr>\n",
  219. " <tr>\n",
  220. " <th>2</th>\n",
  221. " <td>Item_3</td>\n",
  222. " <td>NaN</td>\n",
  223. " <td>NaN</td>\n",
  224. " <td>NaN</td>\n",
  225. " <td>NaN</td>\n",
  226. " <td>1 to 2</td>\n",
  227. " <td>Allentown</td>\n",
  228. " <td>PA</td>\n",
  229. " <td>18102</td>\n",
  230. " <td>Allen316c5</td>\n",
  231. " <td>106 N 17th Street</td>\n",
  232. " <td>Allentown</td>\n",
  233. " <td>William Allen High School</td>\n",
  234. " <td>NaN</td>\n",
  235. " <td>18.775632</td>\n",
  236. " <td>100.773042</td>\n",
  237. " </tr>\n",
  238. " <tr>\n",
  239. " <th>3</th>\n",
  240. " <td>Item_11</td>\n",
  241. " <td>Pantry_1a</td>\n",
  242. " <td>5th</td>\n",
  243. " <td>Pantry_1b</td>\n",
  244. " <td>Linden</td>\n",
  245. " <td>0.5 to 1</td>\n",
  246. " <td>Allentown</td>\n",
  247. " <td>PA</td>\n",
  248. " <td>18102</td>\n",
  249. " <td>Allen324c4</td>\n",
  250. " <td>108 N 5th St</td>\n",
  251. " <td>Allentown</td>\n",
  252. " <td>Grace Episcopal Church</td>\n",
  253. " <td>5th & Linden, Allentown, PA</td>\n",
  254. " <td>40.604685</td>\n",
  255. " <td>-75.468577</td>\n",
  256. " </tr>\n",
  257. " <tr>\n",
  258. " <th>4</th>\n",
  259. " <td>Item_11</td>\n",
  260. " <td>NaN</td>\n",
  261. " <td>NaN</td>\n",
  262. " <td>NaN</td>\n",
  263. " <td>NaN</td>\n",
  264. " <td>0.5 to 1</td>\n",
  265. " <td>Allentown</td>\n",
  266. " <td>PA</td>\n",
  267. " <td>18102</td>\n",
  268. " <td>Allen324s7</td>\n",
  269. " <td>112 N 5th Street</td>\n",
  270. " <td>Allentown</td>\n",
  271. " <td>New Bethany Ministries</td>\n",
  272. " <td>NaN</td>\n",
  273. " <td>18.775632</td>\n",
  274. " <td>100.773042</td>\n",
  275. " </tr>\n",
  276. " </tbody>\n",
  277. "</table>\n",
  278. "</div>"
  279. ],
  280. "text/plain": [
  281. " Item_Number CrossStreet A Question Name CrossStreet A \\\n",
  282. "0 Item_6 NaN NaN \n",
  283. "1 Item_4 NaN NaN \n",
  284. "2 Item_3 NaN NaN \n",
  285. "3 Item_11 Pantry_1a 5th \n",
  286. "4 Item_11 NaN NaN \n",
  287. "\n",
  288. " CrossStreet B Question Name CrossStreet B Distance Descriptor City \\\n",
  289. "0 NaN NaN 0.5 to 1 Allentown \n",
  290. "1 NaN NaN 1 to 2 Allentown \n",
  291. "2 NaN NaN 1 to 2 Allentown \n",
  292. "3 Pantry_1b Linden 0.5 to 1 Allentown \n",
  293. "4 NaN NaN 0.5 to 1 Allentown \n",
  294. "\n",
  295. " State (eg PA) Zipcode Item 1D (Participant ID) \\\n",
  296. "0 PA 18102 Allen316c2 \n",
  297. "1 PA 18102 Allen324s7 \n",
  298. "2 PA 18102 Allen316c5 \n",
  299. "3 PA 18102 Allen324c4 \n",
  300. "4 PA 18102 Allen324s7 \n",
  301. "\n",
  302. " Optional: Exact Street Address Community \\\n",
  303. "0 101 Tilghman St Allentown \n",
  304. "1 106 N 17th Street Allentown \n",
  305. "2 106 N 17th Street Allentown \n",
  306. "3 108 N 5th St Allentown \n",
  307. "4 112 N 5th Street Allentown \n",
  308. "\n",
  309. " Notes gcaddress \\\n",
  310. "0 Elias Market \"Syrian place\"? NaN \n",
  311. "1 \"William\" school (William Allen HS?) NaN \n",
  312. "2 William Allen High School NaN \n",
  313. "3 Grace Episcopal Church 5th & Linden, Allentown, PA \n",
  314. "4 New Bethany Ministries NaN \n",
  315. "\n",
  316. " lat lon \n",
  317. "0 18.775632 100.773042 \n",
  318. "1 18.775632 100.773042 \n",
  319. "2 18.775632 100.773042 \n",
  320. "3 40.604685 -75.468577 \n",
  321. "4 18.775632 100.773042 "
  322. ]
  323. },
  324. "execution_count": 110,
  325. "metadata": {},
  326. "output_type": "execute_result"
  327. }
  328. ],
  329. "source": [
  330. "data.head()"
  331. ]
  332. },
  333. {
  334. "cell_type": "code",
  335. "execution_count": 111,
  336. "metadata": {
  337. "collapsed": true
  338. },
  339. "outputs": [],
  340. "source": [
  341. "data.to_csv('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/geocoded/geocoded_Mod TRACKSmapping Allentown.csv')"
  342. ]
  343. },
  344. {
  345. "cell_type": "code",
  346. "execution_count": null,
  347. "metadata": {
  348. "collapsed": true
  349. },
  350. "outputs": [],
  351. "source": []
  352. }
  353. ],
  354. "metadata": {
  355. "kernelspec": {
  356. "display_name": "Python [Root]",
  357. "language": "python",
  358. "name": "Python [Root]"
  359. },
  360. "language_info": {
  361. "codemirror_mode": {
  362. "name": "ipython",
  363. "version": 3
  364. },
  365. "file_extension": ".py",
  366. "mimetype": "text/x-python",
  367. "name": "python",
  368. "nbconvert_exporter": "python",
  369. "pygments_lexer": "ipython3",
  370. "version": "3.5.2"
  371. }
  372. },
  373. "nbformat": 4,
  374. "nbformat_minor": 0
  375. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement