Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "data_cba= pd.read_csv(\"cordoba.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Unnamed: 0</th>\n",
- " <th>codigo_postal</th>\n",
- " <th>departamento</th>\n",
- " <th>domicilio</th>\n",
- " <th>email</th>\n",
- " <th>jurisdiccion</th>\n",
- " <th>lat</th>\n",
- " <th>localidad</th>\n",
- " <th>lon</th>\n",
- " <th>nombre</th>\n",
- " <th>sector</th>\n",
- " <th>tags</th>\n",
- " <th>telefono</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>361</th>\n",
- " <td>419</td>\n",
- " <td>X2664</td>\n",
- " <td>UNION</td>\n",
- " <td>AVENIDA FUERZA AEREA ARGENTINA</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>NaN</td>\n",
- " <td>BENJAMIN GOULD</td>\n",
- " <td>NaN</td>\n",
- " <td>C.E.N.M.A. Nº 191 ALEJO LEDESMA ANEXO EXTENSIO...</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Secundaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>376</th>\n",
- " <td>434</td>\n",
- " <td>X2568</td>\n",
- " <td>UNION</td>\n",
- " <td>CALLE PUBLICA MORRISON</td>\n",
- " <td>madretransito@gmail.com</td>\n",
- " <td>Córdoba</td>\n",
- " <td>NaN</td>\n",
- " <td>MORRISON</td>\n",
- " <td>NaN</td>\n",
- " <td>INSTITUTO PRIVADO DIOCESANO MADRE TRANSITO</td>\n",
- " <td>Privado</td>\n",
- " <td>Educación Común, Jardín de infantes, Primaria</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>689</th>\n",
- " <td>4066</td>\n",
- " <td>NaN</td>\n",
- " <td>RIO SEGUNDO</td>\n",
- " <td>CALLE PUBLICA</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>NaN</td>\n",
- " <td>CALCHIN OESTE</td>\n",
- " <td>NaN</td>\n",
- " <td>C.E.N.P.A. CALCHIN OESTE</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Primaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>708</th>\n",
- " <td>4085</td>\n",
- " <td>X5915</td>\n",
- " <td>RIO SEGUNDO</td>\n",
- " <td>BELGRANO 277 CARRILOBO</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>NaN</td>\n",
- " <td>CARRILOBO</td>\n",
- " <td>NaN</td>\n",
- " <td>C.E.N.M.A. POZO DEL MOLLE ANEXO CARRILOBO</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Secundaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>861</th>\n",
- " <td>4242</td>\n",
- " <td>X5178</td>\n",
- " <td>PUNILLA</td>\n",
- " <td>LUCRECIA VILLALBA 258 SAN GERONIMO LA CUMBRE</td>\n",
- " <td>insllacumbre@yahoo.com.ar</td>\n",
- " <td>Córdoba</td>\n",
- " <td>NaN</td>\n",
- " <td>LA CUMBRE</td>\n",
- " <td>NaN</td>\n",
- " <td>J.DE INF.NUESTRA SEÑORA DE LOURDES</td>\n",
- " <td>Privado</td>\n",
- " <td>Educación Común, Jardín de infantes</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Unnamed: 0 codigo_postal departamento \\\n",
- "361 419 X2664 UNION \n",
- "376 434 X2568 UNION \n",
- "689 4066 NaN RIO SEGUNDO \n",
- "708 4085 X5915 RIO SEGUNDO \n",
- "861 4242 X5178 PUNILLA \n",
- "\n",
- " domicilio email \\\n",
- "361 AVENIDA FUERZA AEREA ARGENTINA NaN \n",
- "376 CALLE PUBLICA MORRISON madretransito@gmail.com \n",
- "689 CALLE PUBLICA NaN \n",
- "708 BELGRANO 277 CARRILOBO NaN \n",
- "861 LUCRECIA VILLALBA 258 SAN GERONIMO LA CUMBRE insllacumbre@yahoo.com.ar \n",
- "\n",
- " jurisdiccion lat localidad lon \\\n",
- "361 Córdoba NaN BENJAMIN GOULD NaN \n",
- "376 Córdoba NaN MORRISON NaN \n",
- "689 Córdoba NaN CALCHIN OESTE NaN \n",
- "708 Córdoba NaN CARRILOBO NaN \n",
- "861 Córdoba NaN LA CUMBRE NaN \n",
- "\n",
- " nombre sector \\\n",
- "361 C.E.N.M.A. Nº 191 ALEJO LEDESMA ANEXO EXTENSIO... Estatal \n",
- "376 INSTITUTO PRIVADO DIOCESANO MADRE TRANSITO Privado \n",
- "689 C.E.N.P.A. CALCHIN OESTE Estatal \n",
- "708 C.E.N.M.A. POZO DEL MOLLE ANEXO CARRILOBO Estatal \n",
- "861 J.DE INF.NUESTRA SEÑORA DE LOURDES Privado \n",
- "\n",
- " tags telefono \n",
- "361 Educación de Jóvenes y Adultos, Secundaria: NaN \n",
- "376 Educación Común, Jardín de infantes, Primaria NaN \n",
- "689 Educación de Jóvenes y Adultos, Primaria: NaN \n",
- "708 Educación de Jóvenes y Adultos, Secundaria: NaN \n",
- "861 Educación Común, Jardín de infantes NaN "
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data_cba_no_ll=data_cba[data_cba[\"lat\"].isnull() | data_cba[\"lon\"].isnull()]\n",
- "#print(type(data_cba_no_ll.columns.values[2]), data_cba_no_ll.columns.values[2])\n",
- "data_cba_no_ll.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>cNivelList</th>\n",
- " <th>cueanexo</th>\n",
- " <th>departamento</th>\n",
- " <th>localidad</th>\n",
- " <th>nombre</th>\n",
- " <th>xLongitud</th>\n",
- " <th>yLatitud</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>[2]</td>\n",
- " <td>140468400</td>\n",
- " <td>RIO CUARTO</td>\n",
- " <td>RIO CUARTO</td>\n",
- " <td>10 DE JUNIO</td>\n",
- " <td>-64.315441</td>\n",
- " <td>-33.103243</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>[2]</td>\n",
- " <td>140165000</td>\n",
- " <td>RIO SEGUNDO</td>\n",
- " <td>CAMPO FALCO</td>\n",
- " <td>12 DE OCTUBRE</td>\n",
- " <td>-63.370966</td>\n",
- " <td>-31.441044</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>[2]</td>\n",
- " <td>140186200</td>\n",
- " <td>RIO PRIMERO</td>\n",
- " <td>BAJO HONDO</td>\n",
- " <td>12 DE OCTUBRE</td>\n",
- " <td>-63.173243</td>\n",
- " <td>-30.805935</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>[2]</td>\n",
- " <td>140253300</td>\n",
- " <td>CRUZ DEL EJE</td>\n",
- " <td>TRES ARBOLES</td>\n",
- " <td>12 DE OCTUBRE</td>\n",
- " <td>-65.037115</td>\n",
- " <td>-30.932113</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>[2, 1]</td>\n",
- " <td>140058800</td>\n",
- " <td>GENERAL SAN MARTIN</td>\n",
- " <td>PEDANIA YUCAT P/TIO PUJIO</td>\n",
- " <td>20 DE JUNIO</td>\n",
- " <td>-63.308297</td>\n",
- " <td>-32.381459</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " cNivelList cueanexo departamento localidad \\\n",
- "0 [2] 140468400 RIO CUARTO RIO CUARTO \n",
- "1 [2] 140165000 RIO SEGUNDO CAMPO FALCO \n",
- "2 [2] 140186200 RIO PRIMERO BAJO HONDO \n",
- "3 [2] 140253300 CRUZ DEL EJE TRES ARBOLES \n",
- "4 [2, 1] 140058800 GENERAL SAN MARTIN PEDANIA YUCAT P/TIO PUJIO \n",
- "\n",
- " nombre xLongitud yLatitud \n",
- "0 10 DE JUNIO -64.315441 -33.103243 \n",
- "1 12 DE OCTUBRE -63.370966 -31.441044 \n",
- "2 12 DE OCTUBRE -63.173243 -30.805935 \n",
- "3 12 DE OCTUBRE -65.037115 -30.932113 \n",
- "4 20 DE JUNIO -63.308297 -32.381459 "
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import json\n",
- "from pprint import pprint\n",
- "\n",
- "df_coords= None\n",
- "\n",
- "with open('escuelas_cordoba.json') as data_file: \n",
- " data = json.load(data_file)\n",
- " df_coords= pd.DataFrame(data)\n",
- "\n",
- "df_coords= df_coords\n",
- "#print(type(df_coords.columns.values[2]), df_coords.columns.values[2], df_coords.columns.values[2]==data_cba_no_ll.columns.values[2])\n",
- "df_coords.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Unnamed: 0</th>\n",
- " <th>codigo_postal</th>\n",
- " <th>departamento</th>\n",
- " <th>domicilio</th>\n",
- " <th>email</th>\n",
- " <th>jurisdiccion</th>\n",
- " <th>lat</th>\n",
- " <th>localidad</th>\n",
- " <th>lon</th>\n",
- " <th>nombre</th>\n",
- " <th>sector</th>\n",
- " <th>tags</th>\n",
- " <th>telefono</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>361</th>\n",
- " <td>419</td>\n",
- " <td>X2664</td>\n",
- " <td>UNION</td>\n",
- " <td>AVENIDA FUERZA AEREA ARGENTINA</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>-33.588237</td>\n",
- " <td>BENJAMIN GOULD</td>\n",
- " <td>-62.729022</td>\n",
- " <td>C.E.N.M.A. Nº 191 ALEJO LEDESMA ANEXO EXTENSIO...</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Secundaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>376</th>\n",
- " <td>434</td>\n",
- " <td>X2568</td>\n",
- " <td>UNION</td>\n",
- " <td>CALLE PUBLICA MORRISON</td>\n",
- " <td>madretransito@gmail.com</td>\n",
- " <td>Córdoba</td>\n",
- " <td>-32.593472</td>\n",
- " <td>MORRISON</td>\n",
- " <td>-62.833790</td>\n",
- " <td>INSTITUTO PRIVADO DIOCESANO MADRE TRANSITO</td>\n",
- " <td>Privado</td>\n",
- " <td>Educación Común, Jardín de infantes, Primaria</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>689</th>\n",
- " <td>4066</td>\n",
- " <td>NaN</td>\n",
- " <td>RIO SEGUNDO</td>\n",
- " <td>CALLE PUBLICA</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>-31.860901</td>\n",
- " <td>CALCHIN OESTE</td>\n",
- " <td>-63.276600</td>\n",
- " <td>C.E.N.P.A. CALCHIN OESTE</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Primaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>708</th>\n",
- " <td>4085</td>\n",
- " <td>X5915</td>\n",
- " <td>RIO SEGUNDO</td>\n",
- " <td>BELGRANO 277 CARRILOBO</td>\n",
- " <td>NaN</td>\n",
- " <td>Córdoba</td>\n",
- " <td>-31.875674</td>\n",
- " <td>CARRILOBO</td>\n",
- " <td>-63.116872</td>\n",
- " <td>C.E.N.M.A. POZO DEL MOLLE ANEXO CARRILOBO</td>\n",
- " <td>Estatal</td>\n",
- " <td>Educación de Jóvenes y Adultos, Secundaria:</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>861</th>\n",
- " <td>4242</td>\n",
- " <td>X5178</td>\n",
- " <td>PUNILLA</td>\n",
- " <td>LUCRECIA VILLALBA 258 SAN GERONIMO LA CUMBRE</td>\n",
- " <td>insllacumbre@yahoo.com.ar</td>\n",
- " <td>Córdoba</td>\n",
- " <td>-30.986542</td>\n",
- " <td>LA CUMBRE</td>\n",
- " <td>-64.488805</td>\n",
- " <td>J.DE INF.NUESTRA SEÑORA DE LOURDES</td>\n",
- " <td>Privado</td>\n",
- " <td>Educación Común, Jardín de infantes</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Unnamed: 0 codigo_postal departamento \\\n",
- "361 419 X2664 UNION \n",
- "376 434 X2568 UNION \n",
- "689 4066 NaN RIO SEGUNDO \n",
- "708 4085 X5915 RIO SEGUNDO \n",
- "861 4242 X5178 PUNILLA \n",
- "\n",
- " domicilio email \\\n",
- "361 AVENIDA FUERZA AEREA ARGENTINA NaN \n",
- "376 CALLE PUBLICA MORRISON madretransito@gmail.com \n",
- "689 CALLE PUBLICA NaN \n",
- "708 BELGRANO 277 CARRILOBO NaN \n",
- "861 LUCRECIA VILLALBA 258 SAN GERONIMO LA CUMBRE insllacumbre@yahoo.com.ar \n",
- "\n",
- " jurisdiccion lat localidad lon \\\n",
- "361 Córdoba -33.588237 BENJAMIN GOULD -62.729022 \n",
- "376 Córdoba -32.593472 MORRISON -62.833790 \n",
- "689 Córdoba -31.860901 CALCHIN OESTE -63.276600 \n",
- "708 Córdoba -31.875674 CARRILOBO -63.116872 \n",
- "861 Córdoba -30.986542 LA CUMBRE -64.488805 \n",
- "\n",
- " nombre sector \\\n",
- "361 C.E.N.M.A. Nº 191 ALEJO LEDESMA ANEXO EXTENSIO... Estatal \n",
- "376 INSTITUTO PRIVADO DIOCESANO MADRE TRANSITO Privado \n",
- "689 C.E.N.P.A. CALCHIN OESTE Estatal \n",
- "708 C.E.N.M.A. POZO DEL MOLLE ANEXO CARRILOBO Estatal \n",
- "861 J.DE INF.NUESTRA SEÑORA DE LOURDES Privado \n",
- "\n",
- " tags telefono \n",
- "361 Educación de Jóvenes y Adultos, Secundaria: NaN \n",
- "376 Educación Común, Jardín de infantes, Primaria NaN \n",
- "689 Educación de Jóvenes y Adultos, Primaria: NaN \n",
- "708 Educación de Jóvenes y Adultos, Secundaria: NaN \n",
- "861 Educación Común, Jardín de infantes NaN "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data_cba_con_coords= pd.DataFrame([])\n",
- "i= 0\n",
- "for (a,b) in data_cba_no_ll.iterrows():\n",
- " row_as_frame= b.to_frame().T\n",
- " frame_plus= row_as_frame.merge(df_coords, how=\"inner\", on=[\"departamento\", \"localidad\"])\n",
- "\n",
- " if len(frame_plus) > 1:\n",
- " #print(frame_plus)\n",
- " name_parts= row_as_frame[\"nombre\"].iloc[0].split(\" \")\n",
- " if len(name_parts) > 0:\n",
- " def rank_row(row):\n",
- " words_in_common= [w for w in row[\"nombre_y\"].split(\" \") if w in name_parts]\n",
- " row[\"word_rank\"]= 100.0 * (len(words_in_common) / len(name_parts))\n",
- " return row\n",
- " frame_plus= frame_plus.apply(rank_row, axis=1).sort_values(by=\"word_rank\", ascending=False)\n",
- "\n",
- " row_as_frame[\"lon\"]= frame_plus.iloc[0][\"xLongitud\"]\n",
- " row_as_frame[\"lat\"]= frame_plus.iloc[0][\"yLatitud\"]\n",
- " #print(row_as_frame)\n",
- " data_cba_con_coords= data_cba_con_coords.append(row_as_frame)\n",
- "\n",
- "data_cba_con_coords.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "data_cba_con_coords.to_csv(\"cordoba_con_coords.csv\")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement