Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from geopy.geocoders import GoogleV3\n",
- "import json\n",
- "import googlemaps"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 105,
- "metadata": {
- "collapsed": false,
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# Add the data using pandas and check that it's working correctly\n",
- "data = pd.read_excel('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/Mod TRACKSmapping Allentown.xlsx',sheetname='Location Information')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 106,
- "metadata": {
- "collapsed": false,
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(366, 14)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "0 NaN\n",
- "1 NaN\n",
- "2 NaN\n",
- "3 5th & Linden, Allentown, PA\n",
- "4 NaN\n",
- "Name: gcaddress, dtype: object"
- ]
- },
- "execution_count": 106,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Create a merged address field\n",
- "data['gcaddress'] = data['CrossStreet A'] + \" & \" + data['CrossStreet B'] + \", \" + data['City'] + \", \" + data['State (eg PA)']\n",
- "print(data.shape)\n",
- "data['gcaddress'].head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(108, 14)"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "'''# Drop rows with missing addresse values\n",
- "cleandata = data.dropna(subset=['gcaddress'])\n",
- "lessdata = cleandata.groupby(['gcaddress']).head(1)\n",
- "lessdata.shape'''"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Geocode using googlemaps"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 107,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "#### Test using google maps\n",
- "#gmaps.geocode('1716 Mount Vernon St Philadelphia PA')[0]['geometry']['location']['lat']\n",
- "#[0]['']['lat']\n",
- "gmaps = googlemaps.Client(key='',queries_per_second=50)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 108,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "data['lat'] = ''\n",
- "data['lon'] = ''"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 109,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Geocoding complete\n"
- ]
- }
- ],
- "source": [
- "data['lat'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lat']))\n",
- "data['lon'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lng']))\n",
- "print('Geocoding complete')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 110,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Item_Number</th>\n",
- " <th>CrossStreet A Question Name</th>\n",
- " <th>CrossStreet A</th>\n",
- " <th>CrossStreet B Question Name</th>\n",
- " <th>CrossStreet B</th>\n",
- " <th>Distance Descriptor</th>\n",
- " <th>City</th>\n",
- " <th>State (eg PA)</th>\n",
- " <th>Zipcode</th>\n",
- " <th>Item 1D (Participant ID)</th>\n",
- " <th>Optional: Exact Street Address</th>\n",
- " <th>Community</th>\n",
- " <th>Notes</th>\n",
- " <th>gcaddress</th>\n",
- " <th>lat</th>\n",
- " <th>lon</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>Item_6</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0.5 to 1</td>\n",
- " <td>Allentown</td>\n",
- " <td>PA</td>\n",
- " <td>18102</td>\n",
- " <td>Allen316c2</td>\n",
- " <td>101 Tilghman St</td>\n",
- " <td>Allentown</td>\n",
- " <td>Elias Market \"Syrian place\"?</td>\n",
- " <td>NaN</td>\n",
- " <td>18.775632</td>\n",
- " <td>100.773042</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>Item_4</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1 to 2</td>\n",
- " <td>Allentown</td>\n",
- " <td>PA</td>\n",
- " <td>18102</td>\n",
- " <td>Allen324s7</td>\n",
- " <td>106 N 17th Street</td>\n",
- " <td>Allentown</td>\n",
- " <td>\"William\" school (William Allen HS?)</td>\n",
- " <td>NaN</td>\n",
- " <td>18.775632</td>\n",
- " <td>100.773042</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>Item_3</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1 to 2</td>\n",
- " <td>Allentown</td>\n",
- " <td>PA</td>\n",
- " <td>18102</td>\n",
- " <td>Allen316c5</td>\n",
- " <td>106 N 17th Street</td>\n",
- " <td>Allentown</td>\n",
- " <td>William Allen High School</td>\n",
- " <td>NaN</td>\n",
- " <td>18.775632</td>\n",
- " <td>100.773042</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Item_11</td>\n",
- " <td>Pantry_1a</td>\n",
- " <td>5th</td>\n",
- " <td>Pantry_1b</td>\n",
- " <td>Linden</td>\n",
- " <td>0.5 to 1</td>\n",
- " <td>Allentown</td>\n",
- " <td>PA</td>\n",
- " <td>18102</td>\n",
- " <td>Allen324c4</td>\n",
- " <td>108 N 5th St</td>\n",
- " <td>Allentown</td>\n",
- " <td>Grace Episcopal Church</td>\n",
- " <td>5th & Linden, Allentown, PA</td>\n",
- " <td>40.604685</td>\n",
- " <td>-75.468577</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>Item_11</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0.5 to 1</td>\n",
- " <td>Allentown</td>\n",
- " <td>PA</td>\n",
- " <td>18102</td>\n",
- " <td>Allen324s7</td>\n",
- " <td>112 N 5th Street</td>\n",
- " <td>Allentown</td>\n",
- " <td>New Bethany Ministries</td>\n",
- " <td>NaN</td>\n",
- " <td>18.775632</td>\n",
- " <td>100.773042</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Item_Number CrossStreet A Question Name CrossStreet A \\\n",
- "0 Item_6 NaN NaN \n",
- "1 Item_4 NaN NaN \n",
- "2 Item_3 NaN NaN \n",
- "3 Item_11 Pantry_1a 5th \n",
- "4 Item_11 NaN NaN \n",
- "\n",
- " CrossStreet B Question Name CrossStreet B Distance Descriptor City \\\n",
- "0 NaN NaN 0.5 to 1 Allentown \n",
- "1 NaN NaN 1 to 2 Allentown \n",
- "2 NaN NaN 1 to 2 Allentown \n",
- "3 Pantry_1b Linden 0.5 to 1 Allentown \n",
- "4 NaN NaN 0.5 to 1 Allentown \n",
- "\n",
- " State (eg PA) Zipcode Item 1D (Participant ID) \\\n",
- "0 PA 18102 Allen316c2 \n",
- "1 PA 18102 Allen324s7 \n",
- "2 PA 18102 Allen316c5 \n",
- "3 PA 18102 Allen324c4 \n",
- "4 PA 18102 Allen324s7 \n",
- "\n",
- " Optional: Exact Street Address Community \\\n",
- "0 101 Tilghman St Allentown \n",
- "1 106 N 17th Street Allentown \n",
- "2 106 N 17th Street Allentown \n",
- "3 108 N 5th St Allentown \n",
- "4 112 N 5th Street Allentown \n",
- "\n",
- " Notes gcaddress \\\n",
- "0 Elias Market \"Syrian place\"? NaN \n",
- "1 \"William\" school (William Allen HS?) NaN \n",
- "2 William Allen High School NaN \n",
- "3 Grace Episcopal Church 5th & Linden, Allentown, PA \n",
- "4 New Bethany Ministries NaN \n",
- "\n",
- " lat lon \n",
- "0 18.775632 100.773042 \n",
- "1 18.775632 100.773042 \n",
- "2 18.775632 100.773042 \n",
- "3 40.604685 -75.468577 \n",
- "4 18.775632 100.773042 "
- ]
- },
- "execution_count": 110,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 111,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "data.to_csv('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/geocoded/geocoded_Mod TRACKSmapping Allentown.csv')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python [Root]",
- "language": "python",
- "name": "Python [Root]"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement