Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Wed Sep 18 12:54:05 PDT 2019\r\n"
- ]
- }
- ],
- "source": [
- "import numpy as np, matplotlib.pyplot as plt, pandas as pd\n",
- "pd.set_option('display.max_rows', 8)\n",
- "!date\n",
- "\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# How to calculate the probability an individual lives in a household with an Active TB case\n",
- "\n",
- "(for a given year and location, say 2017 South Africa)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>age</th>\n",
- " <th>sex</th>\n",
- " <th>hh_id</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>241</th>\n",
- " <td>72.145127</td>\n",
- " <td>female</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4435</th>\n",
- " <td>23.869853</td>\n",
- " <td>male</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>624</th>\n",
- " <td>54.463637</td>\n",
- " <td>female</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9979</th>\n",
- " <td>6.070569</td>\n",
- " <td>female</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4546</th>\n",
- " <td>81.487253</td>\n",
- " <td>male</td>\n",
- " <td>2999</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8625</th>\n",
- " <td>79.135925</td>\n",
- " <td>male</td>\n",
- " <td>2999</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>266</th>\n",
- " <td>72.139917</td>\n",
- " <td>male</td>\n",
- " <td>2999</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8846</th>\n",
- " <td>34.395619</td>\n",
- " <td>female</td>\n",
- " <td>2999</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>10000 rows × 3 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " age sex hh_id\n",
- "241 72.145127 female 1\n",
- "4435 23.869853 male 1\n",
- "624 54.463637 female 1\n",
- "9979 6.070569 female 2\n",
- "... ... ... ...\n",
- "4546 81.487253 male 2999\n",
- "8625 79.135925 male 2999\n",
- "266 72.139917 male 2999\n",
- "8846 34.395619 female 2999\n",
- "\n",
- "[10000 rows x 3 columns]"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# first load data on individuals, including their age, sex, and household ID\n",
- "\n",
- "# I'll just simulate it for now\n",
- "N = 10_000\n",
- "\n",
- "# set random seed for reproducibility\n",
- "np.random.seed(12345)\n",
- "\n",
- "# simulate data (to be replaced with real data, e.g. from DHS, eventually)\n",
- "df = pd.DataFrame(index=range(N))\n",
- "df['age'] = np.random.uniform(0, 100, size=N)\n",
- "df['sex'] = np.random.choice(['male', 'female'], size=N)\n",
- "df['hh_id'] = np.random.choice(range(3_000), size=N)\n",
- "df.sort_values('hh_id')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# then for a given age_group/sex combo\n",
- "age_start = 1\n",
- "age_end = 5\n",
- "sex = 'male'\n",
- "\n",
- "# find all the households with such a person\n",
- "hh_with = df.query(f'age >= {age_start} and age < {age_end} and sex == \"{sex}\"').hh_id.unique()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "________________________________________________________________________________\n",
- "[Memory] Calling vivarium_gbd_access.gbd.get_incidence_prevalence...\n",
- "get_incidence_prevalence(entity_id=cid(954), location_id=196, entity_type='cause')\n",
- "_______________________________________get_incidence_prevalence - 177.5s, 3.0min\n"
- ]
- }
- ],
- "source": [
- "# then for each of those households, compute the\n",
- "# probability that there is at least one person with active tb\n",
- "\n",
- "import vivarium_inputs, gbd_mapping\n",
- "prev_ltbi = vivarium_inputs.interface.get_measure(\n",
- " gbd_mapping.causes.latent_tuberculosis_infection, 'prevalence', 'South Africa')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "________________________________________________________________________________\n",
- "[Memory] Calling vivarium_gbd_access.gbd.get_incidence_prevalence...\n",
- "get_incidence_prevalence(entity_id=cid(297), location_id=196, entity_type='cause')\n",
- "_______________________________________get_incidence_prevalence - 103.1s, 1.7min\n"
- ]
- }
- ],
- "source": [
- "prev_any_tb = vivarium_inputs.interface.get_measure(\n",
- " gbd_mapping.causes.tuberculosis, 'prevalence', 'South Africa')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th>value</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>draw</th>\n",
- " <th>location</th>\n",
- " <th>sex</th>\n",
- " <th>age_group_start</th>\n",
- " <th>age_group_end</th>\n",
- " <th>year_start</th>\n",
- " <th>year_end</th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th rowspan=\"4\" valign=\"top\">0</th>\n",
- " <th rowspan=\"4\" valign=\"top\">South Africa</th>\n",
- " <th rowspan=\"4\" valign=\"top\">Female</th>\n",
- " <th rowspan=\"4\" valign=\"top\">0.0</th>\n",
- " <th rowspan=\"4\" valign=\"top\">0.019178</th>\n",
- " <th>1990</th>\n",
- " <th>1991</th>\n",
- " <td>0.000047</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1991</th>\n",
- " <th>1992</th>\n",
- " <td>0.000038</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1992</th>\n",
- " <th>1993</th>\n",
- " <td>0.000030</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1993</th>\n",
- " <th>1994</th>\n",
- " <td>0.000022</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"4\" valign=\"top\">999</th>\n",
- " <th rowspan=\"4\" valign=\"top\">South Africa</th>\n",
- " <th rowspan=\"4\" valign=\"top\">Male</th>\n",
- " <th rowspan=\"4\" valign=\"top\">95.0</th>\n",
- " <th rowspan=\"4\" valign=\"top\">125.000000</th>\n",
- " <th>2014</th>\n",
- " <th>2015</th>\n",
- " <td>0.009573</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2015</th>\n",
- " <th>2016</th>\n",
- " <td>0.009185</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2016</th>\n",
- " <th>2017</th>\n",
- " <td>0.008744</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017</th>\n",
- " <th>2018</th>\n",
- " <td>0.008260</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>1288000 rows × 1 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " value\n",
- "draw location sex age_group_start age_group_end year_start year_end \n",
- "0 South Africa Female 0.0 0.019178 1990 1991 0.000047\n",
- " 1991 1992 0.000038\n",
- " 1992 1993 0.000030\n",
- " 1993 1994 0.000022\n",
- "... ...\n",
- "999 South Africa Male 95.0 125.000000 2014 2015 0.009573\n",
- " 2015 2016 0.009185\n",
- " 2016 2017 0.008744\n",
- " 2017 2018 0.008260\n",
- "\n",
- "[1288000 rows x 1 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "index_cols = ['draw', 'location', 'sex', 'age_group_start', 'age_group_end', 'year_start', 'year_end']\n",
- "prev_active_tb = prev_any_tb.set_index(index_cols) - prev_ltbi.set_index(index_cols)\n",
- "prev_active_tb"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "# there is probably a way to use vivarium to add a column to df\n",
- "# that includes the probability of active tb for each simulant!\n",
- "\n",
- "# I'll just simulate this for now, too\n",
- "df['pr_active_tb'] = np.random.uniform(0, .01, size=N)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.009567952684606862"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def pr_ac_tb_in_hh(df_hh):\n",
- " pr_no_tb = 1 - df_hh.pr_active_tb\n",
- " pr_no_tb_in_hh = np.prod(pr_no_tb)\n",
- " return 1 - pr_no_tb_in_hh\n",
- "pr_ac_tb_in_hh(df[df.hh_id == 1])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>age</th>\n",
- " <th>sex</th>\n",
- " <th>hh_id</th>\n",
- " <th>pr_active_tb</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>241</th>\n",
- " <td>72.145127</td>\n",
- " <td>female</td>\n",
- " <td>1</td>\n",
- " <td>0.001116</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>624</th>\n",
- " <td>54.463637</td>\n",
- " <td>female</td>\n",
- " <td>1</td>\n",
- " <td>0.004184</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4435</th>\n",
- " <td>23.869853</td>\n",
- " <td>male</td>\n",
- " <td>1</td>\n",
- " <td>0.004295</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " age sex hh_id pr_active_tb\n",
- "241 72.145127 female 1 0.001116\n",
- "624 54.463637 female 1 0.004184\n",
- "4435 23.869853 male 1 0.004295"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[df.hh_id==1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "hh_id\n",
- "5 0.003730\n",
- "58 0.028011\n",
- "68 0.010399\n",
- "72 0.011235\n",
- " ... \n",
- "2939 0.014072\n",
- "2953 0.015080\n",
- "2958 0.028687\n",
- "2963 0.022994\n",
- "Length: 175, dtype: float64"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pr_ac_tb = df[df.hh_id.isin(hh_with)].groupby('hh_id').apply(pr_ac_tb_in_hh)\n",
- "pr_ac_tb"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.01976982850644556"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pr_ac_tb_age_sex = pr_ac_tb.mean()\n",
- "pr_ac_tb_age_sex"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Now repeat that for each age and sex group, and you\n",
- "# will get the probability of an active TB case in the household\n",
- "# as a function of age and sex for the location and year\n",
- "\n",
- "age_bins = [0, 1, 5, 10] + list(range(15, 101, 5))\n",
- "for i, age_start in enumerate(age_bins[:-1]):\n",
- " age_end = age_bins[i+1]\n",
- " for sex in ['male', 'female']:\n",
- " # find and store pr_ac_tb_age_sex\n",
- " pr_ac_tb_age_sex"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "# and do that for each draw to propagate through the uncertainty\n",
- "# probably good to do a bootstrap resampling of the person data, too"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "vivarium_conic_sqlns",
- "language": "python",
- "name": "vivarium_conic_sqlns"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement