Advertisement
Guest User

Untitled

a guest
Aug 23rd, 2018
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.73 KB | None | 0 0
  1. import csv
  2. import numpy as np
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. import pymc3 as pm
  6. from jupyterthemes import jtplot
  7. from collections import namedtuple
  8. import os
  9. import time
  10. from datetime import datetime
  11. import pickle
  12. import theano
  13. plt.style.use("ggplot")
  14. print(f"Running on PyMC3 v{pm.__version__}")
  15. print("")
  16. ##jtplot.style()
  17.  
  18. pd.options.mode.chained_assignment = None # default="warn"
  19.  
  20. measles_data_all = pd.read_csv("hagelloch_cleaned.csv",
  21. sep=" ")
  22.  
  23. ## remove useless columns
  24. patients_data = measles_data_all[["PN", "FN", "HN", "AGE", "SEX", "CL",
  25. "HNX", "HNY", "IFTO", "PRO_DAYS",
  26. "ERU_DAYS", "DEAD_DAYS"]].copy()
  27.  
  28. patients_data.columns = ["Patient_ID",
  29. "Family_ID",
  30. "Household_ID",
  31. "Age",
  32. "Sex",
  33. "Classroom",
  34. "Loc-x",
  35. "Loc-y",
  36. "Infector",
  37. "Prodromal_Onset",
  38. "Eruption_Onset",
  39. "Death_Day"]
  40.  
  41.  
  42. death_mask = ~(patients_data["Death_Day"] == -1)
  43.  
  44. d = 3 # time still infectious after eruption
  45.  
  46. patients_data["Died"] = death_mask
  47. patients_data["Died"] = patients_data["Died"].astype(np.int)
  48. patients_data["Infectious_Onset"] = patients_data["Prodromal_Onset"] -1
  49. patients_data["Infectious_End"] = np.where(death_mask,
  50. patients_data["Death_Day"],
  51. (patients_data["Eruption_Onset"] + d))
  52. patients_data["Infectious_Duration"] = (patients_data["Infectious_End"] -
  53. patients_data["Infectious_Onset"])
  54.  
  55. print(patients_data.head(10))
  56. print("hi")
  57.  
  58. def lognormal_parameters(desired_mean, desired_variance):
  59. mu = np.log(desired_mean / np.sqrt(1 + desired_variance / (desired_mean**2)))
  60. sigma = np.sqrt(np.log(1 + desired_variance/(desired_mean**2)))
  61. return (mu, sigma)
  62.  
  63. class Observed_Outbreak:
  64. def __init__(self, data_frame, duration=None):
  65. self.data_frame = data_frame.copy()
  66. self.n_patients = self.data_frame.shape[0]
  67. self.slabs = {}
  68. if not (duration is None):
  69. self.duration = duration
  70. self.duration_bc = self.duration[None, :]
  71.  
  72. def impute_latencies(self, latency_params):
  73. latencies = np.random.lognormal(*latency_params,
  74. self.n_patients).astype(np.int)
  75. self.data_frame["Latency_Onset"] = self.data_frame["Infectious_Onset"] - latencies
  76. earliest_time = self.data_frame["Latency_Onset"].min()
  77. latest_time = self.data_frame["Infectious_End"].max()
  78. self.duration = np.arange(earliest_time, latest_time+1)
  79. self.duration_bc = self.duration[None, :]
  80.  
  81. def get_global_infectious(self, valid_days=None):
  82. infectious_onsets_bc = self.data_frame["Infectious_Onset"][:, None]
  83. infectious_ends_bc = self.data_frame["Infectious_End"][:, None]
  84. self.global_infectious_array = ((infectious_onsets_bc <= self.duration_bc) &
  85. (self.duration_bc < infectious_ends_bc)).T
  86. self.global_infectious = self.global_infectious_array.sum(axis=1)
  87. if valid_days is None:
  88. self.valid_days = self.global_infectious.copy() != 0
  89. else:
  90. self.valid_days = valid_days
  91. self.n_days = sum(self.valid_days)
  92. self.global_infectious_array = self.global_infectious_array[self.valid_days, :]
  93. self.global_infectious = self.global_infectious[self.valid_days]
  94. self.slabs["Community"] = np.broadcast_to(self.global_infectious[:, None],
  95. (self.n_days, self.n_patients))
  96.  
  97. def get_susceptible_states(self):
  98. latency_onsets_bc = self.data_frame["Latency_Onset"][:, None]
  99. self.states_before = ((self.duration_bc < latency_onsets_bc).T).astype(np.int)
  100. self.states_after = np.roll(self.states_before, -1, axis=0)
  101. self.states_after[-1, :] = 0
  102. self.states_before = self.states_before[self.valid_days, :]
  103. self.states_after = self.states_after[self.valid_days, :]
  104.  
  105. def get_patient_distances(self):
  106. self.distance_array = np.zeros((self.n_patients, self.n_patients))
  107. for first in range(self.n_patients):
  108. first_patient = self.data_frame.iloc[first]
  109. first_x = first_patient["Loc-x"]
  110. first_y = first_patient["Loc-y"]
  111.  
  112. dx2 = (first_x - self.data_frame["Loc-x"])**2
  113. dy2 = (first_y - self.data_frame["Loc-y"])**2
  114. distance_1d = np.sqrt(dx2 + dy2)
  115. self.distance_array[first, :] = distance_1d
  116.  
  117. def get_distance_infectious(self):
  118. distance_slab = np.zeros((self.n_days, self.n_patients))
  119. for today in range(self.n_days):
  120. global_infectious_today = self.global_infectious_array[today, :]
  121. for patient in range(self.n_patients):
  122. distances_from_patient = self.distance_array[patient, :]
  123. distances_exped = np.exp(-distances_from_patient)
  124. product = global_infectious_today * distances_exped
  125. distance_slab[today, patient] = product.sum()
  126. self.slabs["Distance"] = distance_slab
  127.  
  128. def get_group_infectious(self, group_name, saturated=False):
  129. output_slab = np.zeros((self.n_days, self.n_patients))
  130. for group_id, group in self.data_frame.groupby(group_name):
  131. ## create an Observed Outbreak class for this particular group,
  132. ## but using the duration from the entire outbreak
  133. group_suboutbreak = Observed_Outbreak(group, self.duration)
  134. group_suboutbreak.get_global_infectious(self.valid_days)
  135. ## get number of infectious in this particular group for each day
  136. group_infectious = group_suboutbreak.global_infectious
  137. group_infectious_bc = np.broadcast_to(group_infectious[:, None],
  138. (self.n_days, self.n_patients))
  139. ## nonzero entries when group_id matches the column's (i.e. patient's) group
  140. group_id_mask = (self.data_frame[group_name] == group_id).values
  141. subslab = group_infectious_bc * group_id_mask
  142. output_slab += subslab.astype(np.int)
  143. if saturated:
  144. output_slab = output_slab.astype(np.bool).astype(np.int)
  145. self.slabs[group_name] = output_slab
  146.  
  147. def get_classroom_infectious(self, classroom, infectious_override=False):
  148. data_frame_new = self.data_frame.copy()
  149. if infectious_override:
  150. data_frame_new["Infectious_End"] = (data_frame_new["Infectious_Onset"] +
  151. infectious_override)
  152.  
  153. classroom_frame = data_frame_new[data_frame_new["Classroom"] == classroom]
  154. classroom_suboutbreak = Observed_Outbreak(classroom_frame, self.duration)
  155. classroom_suboutbreak.get_global_infectious(self.valid_days)
  156. classroom_infectious = classroom_suboutbreak.global_infectious
  157. classroom_infectious_bc = np.broadcast_to(classroom_infectious[:, None],
  158. (self.n_days, self.n_patients))
  159. classroom_id_mask = (self.data_frame["Classroom"] == classroom).values
  160. classroom_slab = classroom_infectious_bc * classroom_id_mask
  161. self.slabs[f"Classroom_{classroom}"] = classroom_slab
  162.  
  163. latency_mean = 12
  164. latency_var = 5.433
  165. latency_parameters = lognormal_parameters(latency_mean, latency_var)
  166. stay_at_home = 1
  167. saturated = False
  168.  
  169. n_samples = 500
  170. n_tune = 1500
  171. n_chains = 4
  172.  
  173. model_name = "model_sah_"+str(int(stay_at_home))+"_sat_"+str(int(saturated))+"_"
  174.  
  175. outbreak = Observed_Outbreak(patients_data)
  176. outbreak.impute_latencies(latency_parameters)
  177. outbreak.get_global_infectious()
  178. outbreak.get_susceptible_states()
  179.  
  180. outbreak.get_group_infectious("Household_ID")
  181. outbreak.get_group_infectious("Family_ID")
  182. outbreak.get_group_infectious("Sex")
  183. outbreak.get_classroom_infectious(1, infectious_override=stay_at_home)
  184. outbreak.get_classroom_infectious(2, infectious_override=stay_at_home)
  185.  
  186. community_slab = theano.shared(outbreak.slabs["Community"])
  187. c1_slab = theano.shared(outbreak.slabs["Classroom_1"])
  188. c2_slab = theano.shared(outbreak.slabs["Classroom_2"])
  189. household_slab = theano.shared(outbreak.slabs["Household_ID"])
  190. family_slab = theano.shared(outbreak.slabs["Family_ID"])
  191. sex_slab = theano.shared(outbreak.slabs["Sex"])
  192. s_before = theano.shared(outbreak.states_before)
  193. s_after = theano.shared(outbreak.states_after)
  194.  
  195. with pm.Model() as this_model:
  196. rv_q_community = pm.Uniform("rv_q_community")
  197. ##rv_q_class_0 = pm.Uniform("rv_q_class_0")
  198. rv_q_class_1 = pm.Uniform("rv_q_class_1")
  199. rv_q_class_2 = pm.Uniform("rv_q_class_2")
  200. rv_q_household = pm.Uniform("rv_q_household")
  201. rv_q_family = pm.Uniform("rv_q_family")
  202. rv_q_sex = pm.Uniform("rv_q_sex")
  203.  
  204. probabilities = ((rv_q_community**community_slab) *
  205. (rv_q_class_1**c1_slab) *
  206. (rv_q_class_2**c2_slab) *
  207. (rv_q_household**household_slab) *
  208. (rv_q_family**family_slab) *
  209. (rv_q_sex**sex_slab))
  210.  
  211. ## probability is zero if already not susceptible
  212. probabilities = s_before * probabilities
  213. s_observed = pm.Bernoulli("s_observed", p=probabilities, observed=s_after)
  214.  
  215. print("Model created.")
  216.  
  217. marginalization_mc = 100
  218. for i in range(marginalization_mc):
  219. print(i)
  220. outbreak = Observed_Outbreak(patients_data)
  221. outbreak.impute_latencies(latency_parameters)
  222. outbreak.get_global_infectious()
  223. outbreak.get_susceptible_states()
  224.  
  225. outbreak.get_group_infectious("Household_ID")
  226. outbreak.get_group_infectious("Family_ID")
  227. outbreak.get_group_infectious("Sex")
  228. outbreak.get_classroom_infectious(1, infectious_override=stay_at_home)
  229. outbreak.get_classroom_infectious(2, infectious_override=stay_at_home)
  230.  
  231. community_slab.set_value(outbreak.slabs["Community"])
  232. c1_slab.set_value(outbreak.slabs["Classroom_1"])
  233. c2_slab.set_value(outbreak.slabs["Classroom_2"])
  234. household_slab.set_value(outbreak.slabs["Household_ID"])
  235. family_slab.set_value(outbreak.slabs["Family_ID"])
  236. sex_slab.set_value(outbreak.slabs["Sex"])
  237. s_before.set_value(outbreak.states_before)
  238. s_after.set_value(outbreak.states_after)
  239.  
  240. with this_model:
  241. this_trace = pm.sample(draws=n_samples, cores=n_chains,
  242. chains=n_chains, tune=n_tune)
  243. print(pm.summary(this_trace))
  244.  
  245. now = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
  246. dump_filename = "models/"+model_name+now+".pkl"
  247. os.makedirs(os.path.dirname(dump_filename), exist_ok=True)
  248. model_parameters = {"latency_parameters": latency_parameters,
  249. "stay_at_home": stay_at_home,
  250. "saturated": saturated,
  251. "n_samples": n_samples,
  252. "n_tunes": n_tune,
  253. "n_chains": n_chains}
  254.  
  255. with open(dump_filename, "wb") as the_file:
  256. pickle.dump({"model_parameters": model_parameters,
  257. "latencies": outbreak.data_frame["Latency_Onset"],
  258. "model": this_model,
  259. "trace": this_trace}, the_file)
  260.  
  261. print(f"Model successfully dumped to file: {dump_filename}.")
  262.  
  263. print("Marginalization finished.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement