Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Running on PyMC3 v3.5\n",
- "\n",
- " Patient_ID Family_ID Household_ID Age Sex Classroom Loc-x Loc-y \\\n",
- "0 184 51 6 13.0 0 2 73 80 \n",
- "1 173 9 35 6.0 1 0 85 43 \n",
- "2 177 17 6 8.0 2 1 73 80 \n",
- "3 178 17 6 4.0 1 0 73 80 \n",
- "4 174 9 35 3.0 2 0 85 43 \n",
- "5 45 48 51 7.0 1 1 30 22 \n",
- "6 183 4 6 4.0 1 0 73 80 \n",
- "7 175 9 35 2.0 1 0 85 43 \n",
- "8 181 4 6 10.0 2 2 73 80 \n",
- "9 42 33 45 4.0 2 0 68 7 \n",
- "\n",
- " Infector Prodromal_Onset Eruption_Onset Death_Day Died \\\n",
- "0 0 1 8 -1 0 \n",
- "1 0 3 5 -1 0 \n",
- "2 184 9 13 -1 0 \n",
- "3 184 9 13 -1 0 \n",
- "4 0 10 10 -1 0 \n",
- "5 184 13 15 -1 0 \n",
- "6 184 13 17 -1 0 \n",
- "7 173 14 17 -1 0 \n",
- "8 184 15 19 -1 0 \n",
- "9 178 17 21 -1 0 \n",
- "\n",
- " Infectious_Onset Infectious_End Infectious_Duration \n",
- "0 0 11 11 \n",
- "1 2 8 6 \n",
- "2 8 16 8 \n",
- "3 8 16 8 \n",
- "4 9 13 4 \n",
- "5 12 18 6 \n",
- "6 12 20 8 \n",
- "7 13 20 7 \n",
- "8 14 22 8 \n",
- "9 16 24 8 \n",
- "hi\n"
- ]
- }
- ],
- "source": [
- "import csv\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "import pymc3 as pm\n",
- "from jupyterthemes import jtplot\n",
- "from collections import namedtuple\n",
- "import os\n",
- "import time\n",
- "from datetime import datetime\n",
- "import pickle\n",
- "%matplotlib inline\n",
- "##plt.style.use(\"ggplot\")\n",
- "print(f\"Running on PyMC3 v{pm.__version__}\")\n",
- "print(\"\")\n",
- "jtplot.style()\n",
- "\n",
- "pd.options.mode.chained_assignment = None # default=\"warn\"\n",
- "\n",
- "measles_data_all = pd.read_csv(\"hagelloch_cleaned.csv\",\n",
- " sep=\" \")\n",
- "\n",
- "## remove useless columns\n",
- "patients_data = measles_data_all[[\"PN\", \"FN\", \"HN\", \"AGE\", \"SEX\", \"CL\",\n",
- " \"HNX\", \"HNY\", \"IFTO\", \"PRO_DAYS\",\n",
- " \"ERU_DAYS\", \"DEAD_DAYS\"]].copy()\n",
- "\n",
- "patients_data.columns = [\"Patient_ID\",\n",
- " \"Family_ID\",\n",
- " \"Household_ID\",\n",
- " \"Age\",\n",
- " \"Sex\",\n",
- " \"Classroom\",\n",
- " \"Loc-x\",\n",
- " \"Loc-y\",\n",
- " \"Infector\",\n",
- " \"Prodromal_Onset\",\n",
- " \"Eruption_Onset\",\n",
- " \"Death_Day\"]\n",
- "\n",
- "\n",
- "death_mask = ~(patients_data[\"Death_Day\"] == -1)\n",
- "\n",
- "d = 3 # time still infectious after eruption\n",
- "\n",
- "patients_data[\"Died\"] = death_mask\n",
- "patients_data[\"Died\"] = patients_data[\"Died\"].astype(np.int)\n",
- "patients_data[\"Infectious_Onset\"] = patients_data[\"Prodromal_Onset\"] -1\n",
- "patients_data[\"Infectious_End\"] = np.where(death_mask,\n",
- " patients_data[\"Death_Day\"],\n",
- " (patients_data[\"Eruption_Onset\"] + d))\n",
- "patients_data[\"Infectious_Duration\"] = (patients_data[\"Infectious_End\"] -\n",
- " patients_data[\"Infectious_Onset\"])\n",
- "\n",
- "print(patients_data.head(10))\n",
- "print(\"hi\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "def lognormal_parameters(desired_mean, desired_variance):\n",
- " mu = np.log(desired_mean / np.sqrt(1 + desired_variance / (desired_mean**2)))\n",
- " sigma = np.sqrt(np.log(1 + desired_variance/(desired_mean**2)))\n",
- " return (mu, sigma)\n",
- "\n",
- "class Observed_Outbreak:\n",
- " def __init__(self, data_frame, duration=None):\n",
- " self.data_frame = data_frame.copy()\n",
- " self.n_patients = self.data_frame.shape[0]\n",
- " self.slabs = {}\n",
- " if not (duration is None):\n",
- " self.duration = duration\n",
- " self.duration_bc = self.duration[None, :]\n",
- " \n",
- " def impute_latencies(self, latency_params):\n",
- " latencies = np.random.lognormal(*latency_params,\n",
- " self.n_patients).astype(np.int)\n",
- " self.data_frame[\"Latency_Onset\"] = self.data_frame[\"Infectious_Onset\"] - latencies\n",
- " earliest_time = self.data_frame[\"Latency_Onset\"].min()\n",
- " latest_time = self.data_frame[\"Infectious_End\"].max()\n",
- " self.duration = np.arange(earliest_time, latest_time+1)\n",
- " self.duration_bc = self.duration[None, :]\n",
- " \n",
- " def get_global_infectious(self, valid_days=None):\n",
- " infectious_onsets_bc = self.data_frame[\"Infectious_Onset\"][:, None]\n",
- " infectious_ends_bc = self.data_frame[\"Infectious_End\"][:, None]\n",
- " self.global_infectious_array = ((infectious_onsets_bc <= self.duration_bc) &\n",
- " (self.duration_bc < infectious_ends_bc)).T\n",
- " self.global_infectious = self.global_infectious_array.sum(axis=1)\n",
- " if valid_days is None:\n",
- " self.valid_days = self.global_infectious.copy() != 0\n",
- " else:\n",
- " self.valid_days = valid_days\n",
- " self.n_days = sum(self.valid_days)\n",
- " self.global_infectious_array = self.global_infectious_array[self.valid_days, :]\n",
- " self.global_infectious = self.global_infectious[self.valid_days]\n",
- " self.slabs[\"Community\"] = np.broadcast_to(self.global_infectious[:, None],\n",
- " (self.n_days, self.n_patients))\n",
- " \n",
- " def get_susceptible_states(self):\n",
- " latency_onsets_bc = self.data_frame[\"Latency_Onset\"][:, None]\n",
- " self.states_before = ((self.duration_bc < latency_onsets_bc).T).astype(np.int)\n",
- " self.states_after = np.roll(self.states_before, -1, axis=0)\n",
- " self.states_after[-1, :] = 0 \n",
- " self.states_before = self.states_before[self.valid_days, :]\n",
- " self.states_after = self.states_after[self.valid_days, :]\n",
- " \n",
- " def get_patient_distances(self):\n",
- " self.distance_array = np.zeros((self.n_patients, self.n_patients))\n",
- " for first in range(self.n_patients):\n",
- " first_patient = self.data_frame.iloc[first]\n",
- " first_x = first_patient[\"Loc-x\"]\n",
- " first_y = first_patient[\"Loc-y\"]\n",
- " \n",
- " dx2 = (first_x - self.data_frame[\"Loc-x\"])**2\n",
- " dy2 = (first_y - self.data_frame[\"Loc-y\"])**2\n",
- " distance_1d = np.sqrt(dx2 + dy2)\n",
- " self.distance_array[first, :] = distance_1d\n",
- " \n",
- " def get_distance_infectious(self):\n",
- " distance_slab = np.zeros((self.n_days, self.n_patients))\n",
- " for today in range(self.n_days):\n",
- " global_infectious_today = self.global_infectious_array[today, :]\n",
- " for patient in range(self.n_patients):\n",
- " distances_from_patient = self.distance_array[patient, :]\n",
- " distances_exped = np.exp(-distances_from_patient)\n",
- " product = global_infectious_today * distances_exped\n",
- " distance_slab[today, patient] = product.sum()\n",
- " self.slabs[\"Distance\"] = distance_slab\n",
- " \n",
- " def get_group_infectious(self, group_name, saturated=False):\n",
- " output_slab = np.zeros((self.n_days, self.n_patients))\n",
- " for group_id, group in self.data_frame.groupby(group_name):\n",
- " ## create an Observed Outbreak class for this particular group,\n",
- " ## but using the duration from the entire outbreak\n",
- " group_suboutbreak = Observed_Outbreak(group, self.duration)\n",
- " group_suboutbreak.get_global_infectious(self.valid_days)\n",
- " ## get number of infectious in this particular group for each day\n",
- " group_infectious = group_suboutbreak.global_infectious\n",
- " group_infectious_bc = np.broadcast_to(group_infectious[:, None],\n",
- " (self.n_days, self.n_patients))\n",
- " ## nonzero entries when group_id matches the column's (i.e. patient's) group\n",
- " group_id_mask = (self.data_frame[group_name] == group_id).values\n",
- " subslab = group_infectious_bc * group_id_mask\n",
- " output_slab += subslab.astype(np.int)\n",
- " if saturated:\n",
- " output_slab = output_slab.astype(np.bool).astype(np.int)\n",
- " self.slabs[group_name] = output_slab\n",
- " \n",
- " def get_classroom_infectious(self, classroom, infectious_override=False):\n",
- " data_frame_new = self.data_frame.copy()\n",
- " if infectious_override:\n",
- " data_frame_new[\"Infectious_End\"] = (data_frame_new[\"Infectious_Onset\"] +\n",
- " infectious_override)\n",
- " \n",
- " classroom_frame = data_frame_new[data_frame_new[\"Classroom\"] == classroom]\n",
- " classroom_suboutbreak = Observed_Outbreak(classroom_frame, self.duration)\n",
- " classroom_suboutbreak.get_global_infectious(self.valid_days)\n",
- " classroom_infectious = classroom_suboutbreak.global_infectious\n",
- " classroom_infectious_bc = np.broadcast_to(classroom_infectious[:, None],\n",
- " (self.n_days, self.n_patients))\n",
- " classroom_id_mask = (self.data_frame[\"Classroom\"] == classroom).values\n",
- " classroom_slab = classroom_infectious_bc * classroom_id_mask\n",
- " self.slabs[f\"Classroom_{classroom}\"] = classroom_slab\n",
- " \n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Auto-assigning NUTS sampler...\n",
- "Initializing NUTS using jitter+adapt_diag...\n",
- "Multiprocess sampling (4 chains in 4 jobs)\n",
- "NUTS: [rv_q_sex, rv_q_family, rv_q_household, rv_q_class_2, rv_q_class_1, rv_q_community]\n",
- "Sampling 4 chains: 100%|██████████| 8000/8000 [03:28<00:00, 15.39draws/s]\n",
- "D:\\Anaconda3\\lib\\site-packages\\mkl_fft\\_numpy_fft.py:1044: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
- " output = mkl_fft.rfftn_numpy(a, s, axes)\n",
- "There was 1 divergence after tuning. Increase `target_accept` or reparameterize.\n",
- "There were 2 divergences after tuning. Increase `target_accept` or reparameterize.\n",
- "The acceptance probability does not match the target. It is 0.6616778933467038, but should be close to 0.8. Try to increase the number of tuning steps.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " mean sd mc_error hpd_2.5 hpd_97.5 n_eff \\\n",
- "rv_q_community 0.994322 0.000972 0.000038 0.992559 0.996216 505.819056 \n",
- "rv_q_class_1 0.882522 0.058837 0.001676 0.768720 0.984137 1196.684376 \n",
- "rv_q_class_2 0.946839 0.018331 0.000500 0.914268 0.982833 1425.523988 \n",
- "rv_q_household 0.892061 0.052584 0.001962 0.790368 0.984600 697.508660 \n",
- "rv_q_family 0.772249 0.105571 0.004480 0.601709 0.998689 516.690902 \n",
- "rv_q_sex 0.996833 0.002024 0.000079 0.993130 0.999992 544.947321 \n",
- "\n",
- " Rhat \n",
- "rv_q_community 1.004532 \n",
- "rv_q_class_1 1.000721 \n",
- "rv_q_class_2 1.002293 \n",
- "rv_q_household 1.002365 \n",
- "rv_q_family 1.003017 \n",
- "rv_q_sex 1.006037 \n",
- "Model successfully dumped to file: models/model_sah_1_sat_0_2018-08-23T15-15-18.pkl.\n",
- "1\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Auto-assigning NUTS sampler...\n",
- "Initializing NUTS using jitter+adapt_diag...\n",
- "Multiprocess sampling (4 chains in 4 jobs)\n",
- "NUTS: [rv_q_sex, rv_q_family, rv_q_household, rv_q_class_2, rv_q_class_1, rv_q_community]\n",
- "Sampling 4 chains: 90%|█████████ | 7229/8000 [02:53<00:16, 46.84draws/s]"
- ]
- }
- ],
- "source": [
- "latency_mean = 12\n",
- "latency_var = 5.433\n",
- "latency_parameters = lognormal_parameters(latency_mean, latency_var)\n",
- "stay_at_home = 1\n",
- "saturated = False\n",
- "\n",
- "n_samples = 500\n",
- "n_tune = 1500\n",
- "n_chains = 4\n",
- "\n",
- "model_name = \"model_sah_\"+str(int(stay_at_home))+\"_sat_\"+str(int(saturated))+\"_\"\n",
- "marginalization_mc = 66\n",
- "for i in range(marginalization_mc):\n",
- " print(i)\n",
- " outbreak = Observed_Outbreak(patients_data)\n",
- " outbreak.impute_latencies(latency_parameters)\n",
- " outbreak.get_global_infectious()\n",
- " outbreak.get_susceptible_states()\n",
- "\n",
- " outbreak.get_group_infectious(\"Household_ID\")\n",
- " outbreak.get_group_infectious(\"Family_ID\")\n",
- " outbreak.get_group_infectious(\"Sex\")\n",
- " outbreak.get_classroom_infectious(1, infectious_override=stay_at_home)\n",
- " outbreak.get_classroom_infectious(2, infectious_override=stay_at_home)\n",
- "\n",
- " community_slab = outbreak.slabs[\"Community\"]\n",
- " c1_slab = outbreak.slabs[\"Classroom_1\"]\n",
- " c2_slab = outbreak.slabs[\"Classroom_2\"]\n",
- " household_slab = outbreak.slabs[\"Household_ID\"]\n",
- " family_slab = outbreak.slabs[\"Family_ID\"]\n",
- " sex_slab = outbreak.slabs[\"Sex\"]\n",
- " s_before = outbreak.states_before\n",
- " s_after = outbreak.states_after\n",
- "\n",
- " this_model = pm.Model()\n",
- " with this_model:\n",
- " rv_q_community = pm.Uniform(\"rv_q_community\")\n",
- " ##rv_q_class_0 = pm.Uniform(\"rv_q_class_0\")\n",
- " rv_q_class_1 = pm.Uniform(\"rv_q_class_1\")\n",
- " rv_q_class_2 = pm.Uniform(\"rv_q_class_2\")\n",
- " rv_q_household = pm.Uniform(\"rv_q_household\")\n",
- " rv_q_family = pm.Uniform(\"rv_q_family\")\n",
- " rv_q_sex = pm.Uniform(\"rv_q_sex\")\n",
- "\n",
- " probabilities = ((rv_q_community**community_slab) *\n",
- " (rv_q_class_1**c1_slab) *\n",
- " (rv_q_class_2**c2_slab) *\n",
- " (rv_q_household**household_slab) *\n",
- " (rv_q_family**family_slab) *\n",
- " (rv_q_sex**sex_slab))\n",
- "\n",
- " ## probability is zero if already not susceptible\n",
- " probabilities = s_before * probabilities\n",
- " s_observed = pm.Bernoulli(\"s_observed\", p=probabilities, observed=s_after)\n",
- " this_trace = pm.sample(draws=n_samples, cores=n_chains,\n",
- " chains=n_chains, tune=n_tune)\n",
- " pm.traceplot(this_trace)\n",
- " ##print(\"Likelihood sampled using MCMC NUTS:\")\n",
- " ##plt.show()\n",
- " print(pm.summary(this_trace))\n",
- "\n",
- "\n",
- " now = datetime.now().strftime(\"%Y-%m-%dT%H-%M-%S\")\n",
- " dump_filename = \"models/\"+model_name+now+\".pkl\"\n",
- " os.makedirs(os.path.dirname(dump_filename), exist_ok=True)\n",
- " model_parameters = {\"latency_parameters\": latency_parameters,\n",
- " \"stay_at_home\": stay_at_home,\n",
- " \"saturated\": saturated,\n",
- " \"n_samples\": n_samples,\n",
- " \"n_tunes\": n_tune,\n",
- " \"n_chains\": n_chains}\n",
- "\n",
- " with open(dump_filename, \"wb\") as the_file:\n",
- " pickle.dump({\"model_parameters\": model_parameters,\n",
- " \"latencies\": outbreak.data_frame[\"Latency_Onset\"],\n",
- " \"model\": this_model,\n",
- " \"trace\": this_trace}, the_file)\n",
- "\n",
- " print(f\"Model successfully dumped to file: {dump_filename}.\")\n",
- "\n",
- "print(\"Marginalization finished.\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement