Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.30 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "metadata": {},
  5. "cell_type": "markdown",
  6. "source": "## Structure WB data to HuruMap"
  7. },
  8. {
  9. "metadata": {
  10. "trusted": true
  11. },
  12. "cell_type": "code",
  13. "source": "import pandas as pd",
  14. "execution_count": 1,
  15. "outputs": []
  16. },
  17. {
  18. "metadata": {
  19. "scrolled": true
  20. },
  21. "cell_type": "markdown",
  22. "source": "# Load the WB data and takwimu indicators "
  23. },
  24. {
  25. "metadata": {
  26. "trusted": true
  27. },
  28. "cell_type": "code",
  29. "source": "# generate a dict from the indicators file\ntakwimu_indicators = pd.read_csv('../data/takwimu_indicators.csv',\n index_col=0, squeeze=True).to_dict()\n# Gather indicator data on the selected country\n\n# # Kenya\n# data = pd.read_csv('../data/Kenya/WB_KEN.csv')\n# structured = '../huru/ke/ke_'\n# country_code = 'KE'\n\n# Ethiopia\ndata = pd.read_csv('../data/Ethiopia/WB_ET.csv')\nstructured = '../huru/et/et_'\ncountry_code = 'ET'\n\n",
  30. "execution_count": 48,
  31. "outputs": []
  32. },
  33. {
  34. "metadata": {},
  35. "cell_type": "markdown",
  36. "source": "### Wrangle by Indicator"
  37. },
  38. {
  39. "metadata": {
  40. "trusted": true
  41. },
  42. "cell_type": "code",
  43. "source": "# population total\ndef population_total():\n \n i_code = ['SP.POP.TOTL.FE.IN','SP.POP.TOTL.MA.IN']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Population, male\": 'male',\"Population, female\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n population = df.set_index(['geo_code','geo_version']).sort_index() \n \n return population\n\n# Prevalence of HIV,(% ages 15-24)\n\ndef hiv_prevalence():\n i_code = ['SH.HIV.1524.MA.ZS','SH.HIV.1524.FE.ZS']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Prevalence of HIV, male (% ages 15-24)\": 'male',\n \"Prevalence of HIV, female (% ages 15-24)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n prevalence = df.set_index(['geo_code','geo_version']).sort_index() \n \n return prevalence\n \n# Infant and Under 5 Mortality Rate \n\ndef infant_under_5_mortality():\n \n i_code = ['SP.DYN.IMRT.IN','SH.DYN.MORT']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Mortality rate, infant (per 1,000 live births)\": 'infant',\n \"Mortality rate, under-5 (per 1,000 live births)\":'under_5' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['infant','under_5'],\n var_name='mortality', value_name='total')\n infant_under_5 = df.set_index(['geo_code','geo_version']).sort_index() \n \n return infant_under_5\n\n# Adult Literacy rate\n\ndef adult_literacy_rate():\n \n i_code = ['SE.ADT.LITR.FE.ZS','SE.ADT.LITR.MA.ZS']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Literacy rate, adult male (% of males ages 15 and above)\": 'male',\n \"Literacy rate, adult female (% of females ages 15 and above)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n literacy_rate = df.set_index(['geo_code','geo_version']).sort_index() \n \n return literacy_rate\n\n# Employment in agriculture, (% of employment) (modeled ILO estimate)\n\ndef agr_employment():\n \n i_code = ['SL.AGR.EMPL.FE.ZS','SL.AGR.EMPL.MA.ZS']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Employment in agriculture, female (% of female employment) (modeled ILO estimate)\": 'female',\n \"Employment in agriculture, male (% of male employment) (modeled ILO estimate)\":'male' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n agr_employment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return agr_employment\n\n\n# School enrollment, primary,\n\n\ndef primary_ed_enrollment():\n \n i_code = ['SE.PRM.ENRR.MA','SE.PRM.ENRR.FE']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"School enrollment, primary, male (% gross)\": 'male',\n \"School enrollment, primary, female (% gross)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n primary_enrollment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return primary_enrollment\n\n# School enrollment, Secondary,\n\n\ndef secondary_ed_enrollment():\n \n i_code = ['SE.SEC.ENRR.FE','SE.SEC.ENRR.MA']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"School enrollment, secondary, male (% gross)\": 'male',\n \"School enrollment, secondary, female (% gross)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n secondary_enrollment = df.set_index(['geo_code','geo_version']).sort_index() \n \n return secondary_enrollment\n\n# Mobile cellular subscriptions (per 100 people),\n\n\ndef cellular_subcription():\n \n i_code = ['IT.CEL.SETS.P2']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Mobile cellular subscriptions (per 100 people)\": 'total'})\n df['geo_code'] = country_code\n cellular_subcription = df.set_index(['geo_code','geo_version']).sort_index() \n \n return cellular_subcription\n\n# Life expectancy at birth,(years)\n\ndef life_expectancy():\n \n i_code = ['SP.DYN.LE00.FE.IN','SP.DYN.LE00.MA.IN']\n \n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Life expectancy at birth, male (years)\": 'male',\n \"Life expectancy at birth, female (years)\":'female' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['female','male'],\n var_name='sex', value_name='total')\n life_expectancy = df.set_index(['geo_code','geo_version']).sort_index() \n \n return life_expectancy\n\n# Women in national parliaments (%)\n\ndef women_in_parliament():\n \n i_code = ['SG.GEN.PARL.ZS']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Proportion of seats held by women in national parliaments (%)\": 'total'})\n df['geo_code'] = country_code\n\n women_in_parliament = df.set_index(['geo_code','geo_version']).sort_index() \n \n return women_in_parliament\n\n# Maternal mortality ratio (modeled estimate, per 100,000 live births)\n\ndef maternal_mortality():\n \n i_code = ['SH.STA.MMRT']\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Maternal mortality ratio (modeled estimate, per 100,000 live births)\": 'total'})\n df['geo_code'] = country_code\n\n maternal_mortality = df.set_index(['geo_code','geo_version']).sort_index() \n \n return maternal_mortality\n\n# Medical Staff - Physicians, Nurses and Midwives (per 1,000 people)\n\ndef physicians_nurses():\n \n i_code = ['SH.MED.PHYS.ZS','SH.MED.NUMW.P3']\n\n df = data.loc[data['I_code'].isin(i_code)].drop(['I_code'],axis=1).T.reset_index()\n df.columns = df.iloc[0]\n df = df[1:]\n df = df.rename(columns={\"Indicator\": 'geo_version' , \"Physicians (per 1,000 people)\": 'physicians',\n \"Nurses and midwives (per 1,000 people)\":'nurses_midwives' })\n df['geo_code'] = country_code\n\n df = df.melt(id_vars=['geo_code','geo_version'], value_vars=['nurses_midwives','physicians'],\n var_name='medical_staff', value_name='total')\n physicians_nurses = df.set_index(['geo_code','geo_version']).sort_index()\n \n return physicians_nurses\n\n\n\n",
  44. "execution_count": 49,
  45. "outputs": []
  46. },
  47. {
  48. "metadata": {
  49. "trusted": true
  50. },
  51. "cell_type": "code",
  52. "source": "# Save to location\n\ndef save_to_location():\n population_total().to_csv(structured +'population.csv')\n hiv_prevalence().to_csv(structured +'hiv_prevalence.csv')\n infant_under_5_mortality().to_csv(structured +'infant_under_5_mortality.csv')\n adult_literacy_rate().to_csv(structured +'adult_literacy_rate.csv')\n agr_employment().to_csv(structured +'agr_employment.csv')\n primary_ed_enrollment().to_csv(structured +'primary_enrollment.csv')\n secondary_ed_enrollment().to_csv(structured +'secondary_enrollment.csv')\n cellular_subcription().to_csv(structured + 'cellular subscriptions_per_100.csv')\n life_expectancy().to_csv(structured + 'life_expectancy.csv')\n women_in_parliament().to_csv(structured + 'women_in_parliament.csv')\n maternal_mortality().to_csv(structured + 'maternal_mortality.csv')\n physicians_nurses().to_csv(structured + 'physicians_nurses.csv')\n \n return ",
  53. "execution_count": 50,
  54. "outputs": []
  55. },
  56. {
  57. "metadata": {
  58. "trusted": true
  59. },
  60. "cell_type": "code",
  61. "source": "save_to_location()",
  62. "execution_count": 51,
  63. "outputs": []
  64. },
  65. {
  66. "metadata": {
  67. "trusted": true
  68. },
  69. "cell_type": "code",
  70. "source": "",
  71. "execution_count": null,
  72. "outputs": []
  73. }
  74. ],
  75. "metadata": {
  76. "kernelspec": {
  77. "name": "python3",
  78. "display_name": "Python 3",
  79. "language": "python"
  80. },
  81. "language_info": {
  82. "codemirror_mode": {
  83. "name": "ipython",
  84. "version": 3
  85. },
  86. "file_extension": ".py",
  87. "mimetype": "text/x-python",
  88. "name": "python",
  89. "nbconvert_exporter": "python",
  90. "pygments_lexer": "ipython3",
  91. "version": "3.6.8"
  92. }
  93. },
  94. "nbformat": 4,
  95. "nbformat_minor": 2
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement