Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "metadata": {},
- "cell_type": "markdown",
- "source": "## Structure WB data to HuruMap"
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "import pandas as pd",
- "execution_count": 1,
- "outputs": []
- },
- {
- "metadata": {
- "scrolled": true
- },
- "cell_type": "markdown",
- "source": "# Load the WB data and takwimu indicators "
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "# generate a dict from the indicators file\ntakwimu_indicators = pd.read_csv('../data/takwimu_indicators.csv',\n index_col=0, squeeze=True).to_dict()\n# Gather indicator data on the selected country\n\n# # Kenya\n# data = pd.read_csv('../data/Kenya/WB_KEN.csv')\n# structured = '../huru/ke/ke_'\n# country_code = 'KE'\n\n# Ethiopia\ndata = pd.read_csv('../data/Ethiopia/WB_ET.csv')\nstructured = '../huru/et/et_'\ncountry_code = 'ET'\n\n",
- "execution_count": 48,
- "outputs": []
- },
- {
- "metadata": {},
- "cell_type": "markdown",
- "source": "### Wrangle by Indicator"
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "# population total\ndef population_total():\n \n i_code = ['SP.POP.TOTL.FE.IN','SP.POP.TOTL.MA.IN']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Population, male\": 'male',\"Population, female\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n population = df.set_index(['geo_code','geo_version']).sort_index() \n \n return population\n\n# Prevalence of HIV,(% ages 15-24)\n\ndef hiv_prevalence():\n i_code = ['SH.HIV.1524.MA.ZS','SH.HIV.1524.FE.ZS']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Prevalence of HIV, male (% ages 15-24)\": 'male',\n \"Prevalence of HIV, female (% ages 15-24)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n prevalence = df.set_index(['geo_code','geo_version']).sort_index() \n \n return prevalence\n \n# Infant and Under 5 Mortality Rate \n\ndef infant_under_5_mortality():\n \n i_code = ['SP.DYN.IMRT.IN','SH.DYN.MORT']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Mortality rate, infant (per 1,000 live births)\": 'infant',\n \"Mortality rate, under-5 (per 1,000 live births)\":'under_5' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['infant','under_5'],\n var_name='mortality', value_name='total')\n infant_under_5 = df.set_index(['geo_code','geo_version']).sort_index() \n \n return infant_under_5\n\n# Adult Literacy rate\n\ndef adult_literacy_rate():\n \n i_code = ['SE.ADT.LITR.FE.ZS','SE.ADT.LITR.MA.ZS']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Literacy rate, adult male (% of males ages 15 and above)\": 'male',\n \"Literacy rate, adult female (% of females ages 15 and above)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n literacy_rate = df.set_index(['geo_code','geo_version']).sort_index() \n \n return literacy_rate\n\n# Employment in agriculture, (% of employment) (modeled ILO estimate)\n\ndef agr_employment():\n \n i_code = ['SL.AGR.EMPL.FE.ZS','SL.AGR.EMPL.MA.ZS']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Employment in agriculture, female (% of female employment) (modeled ILO estimate)\": 'female',\n \"Employment in agriculture, male (% of male employment) (modeled ILO estimate)\":'male' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n agr_employment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return agr_employment\n\n\n# School enrollment, primary,\n\n\ndef primary_ed_enrollment():\n \n i_code = ['SE.PRM.ENRR.MA','SE.PRM.ENRR.FE']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"School enrollment, primary, male (% gross)\": 'male',\n \"School enrollment, primary, female (% gross)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n primary_enrollment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return primary_enrollment\n\n# School enrollment, Secondary,\n\n\ndef secondary_ed_enrollment():\n \n i_code = ['SE.SEC.ENRR.FE','SE.SEC.ENRR.MA']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"School enrollment, secondary, male (% gross)\": 'male',\n \"School enrollment, secondary, female (% gross)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n secondary_enrollment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return secondary_enrollment\n\n# Mobile cellular subscriptions (per 100 people),\n\n\ndef cellular_subcription():\n \n i_code = ['IT.CEL.SETS.P2']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Mobile cellular subscriptions (per 100 people)\": 'total'})\n df['geo_code'] = country_code\n cellular_subcription = df.set_index(['geo_code','geo_version']).sort_index() \n \n return cellular_subcription\n\n# Life expectancy at birth,(years)\n\ndef life_expectancy():\n \n i_code = ['SP.DYN.LE00.FE.IN','SP.DYN.LE00.MA.IN']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Life expectancy at birth, male (years)\": 'male',\n \"Life expectancy at birth, female (years)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n life_expectancy = df.set_index(['geo_code','geo_version']).sort_index() \n \n return life_expectancy\n\n# Women in national parliaments (%)\n\ndef women_in_parliament():\n \n i_code = ['SG.GEN.PARL.ZS']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Proportion of seats held by women in national parliaments (%)\": 'total'})\n df['geo_code'] = country_code\n\n women_in_parliament = df.set_index(['geo_code','geo_version']).sort_index() \n \n return women_in_parliament\n\n# Maternal mortality ratio (modeled estimate, per 100,000 live births)\n\ndef maternal_mortality():\n \n i_code = ['SH.STA.MMRT']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Maternal mortality ratio (modeled estimate, per 100,000 live births)\": 'total'})\n df['geo_code'] = country_code\n\n maternal_mortality = df.set_index(['geo_code','geo_version']).sort_index() \n \n return maternal_mortality\n\n# Medical Staff - Physicians, Nurses and Midwives (per 1,000 people)\n\ndef physicians_nurses():\n \n i_code = ['SH.MED.PHYS.ZS','SH.MED.NUMW.P3']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Physicians (per 1,000 people)\": 'physicians',\n \"Nurses and midwives (per 1,000 people)\":'nurses_midwives' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['nurses_midwives','physicians'],\n var_name='medical_staff', value_name='total')\n physicians_nurses = df.set_index(['geo_code','geo_version']).sort_index()\n \n return physicians_nurses\n\n\n\n",
- "execution_count": 49,
- "outputs": []
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "# Save to location\n\ndef save_to_location():\n population_total().to_csv(structured +'population.csv')\n hiv_prevalence().to_csv(structured +'hiv_prevalence.csv')\n infant_under_5_mortality().to_csv(structured +'infant_under_5_mortality.csv')\n adult_literacy_rate().to_csv(structured +'adult_literacy_rate.csv')\n agr_employment().to_csv(structured +'agr_employment.csv')\n primary_ed_enrollment().to_csv(structured +'primary_enrollment.csv')\n secondary_ed_enrollment().to_csv(structured +'secondary_enrollment.csv')\n cellular_subcription().to_csv(structured + 'cellular subscriptions_per_100.csv')\n life_expectancy().to_csv(structured + 'life_expectancy.csv')\n women_in_parliament().to_csv(structured + 'women_in_parliament.csv')\n maternal_mortality().to_csv(structured + 'maternal_mortality.csv')\n physicians_nurses().to_csv(structured + 'physicians_nurses.csv')\n \n return ",
- "execution_count": 50,
- "outputs": []
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "save_to_location()",
- "execution_count": 51,
- "outputs": []
- },
- {
- "metadata": {
- "trusted": true
- },
- "cell_type": "code",
- "source": "",
- "execution_count": null,
- "outputs": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3",
- "language": "python"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement