Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # covid-estimates.py Given a set of points, uses several functions to estaimate
- # the progression of the virus. From the CSV, we can generate P0 (Base value of
- # logistic curve) r (Growth rate)
- # We need some form of estimate of how far we are, so this program uses a binary
- # search to find the least-norm estimat
- # The "estiamte" in this case is a ratio between 0 and 1, with 1 being
- # "asymptotic number of cases" (K in the logistic function)
- import pandas # Read in CSV.
- import numpy as np
- import matplotlib.pyplot as plt
- import math
- def get_growth_rate(data):
- # get latest and earliest data
- ind_first = 0
- ind_last = len(data) - 1
- n_days = (ind_last - ind_first)
- ratio = (data.loc[ind_last, "nCases"]) / (data.loc[ind_first, "nCases"])
- # Prone to numerical roundoff error. Possible improvement would be to use
- # the last ten days.
- return pow(ratio, 1/n_days)
- def get_init_population(data):
- return data.loc[0, "nCases"]
- def logistic(P0, r, K, t):
- return (K) / (1 + ((K - P0)/ P0) * math.pow(math.e, -1 * r * (t)))
- # Generates a time-series of a logistic growth for n_days.
- # First, it solves for K, the carrying capacity, and it subsequently
- # plugs it into the logistic equation given the growth rate r, initial
- # population.
- def generate_logistic_fn(P0, r, progression_rate, latest_nCases, n_days):
- K = latest_nCases / progression_rate
- return np.asarray(list(map(lambda t: logistic(P0, r, K, t),
- range(n_days))))
- # Assumes data and approximation are both np arrays
- def get_error(data, approximation):
- return np.linalg.norm( data - approximation )
- # Iterate until you get a low norm.
- def approximate_progression_ratio(data, P0, r, n_days, latest):
- end = 1.0
- start = 0.0
- midpt = (start + end / 2)
- error = get_error(
- data, generate_logistic_fn(P0, r, midpt, latest, n_days)
- )
- # Go for 10 iterations.
- for i in range(10):
- midpt = (start + end) / 2
- left = (start + midpt) /2
- right = (end + midpt) / 2
- # Check against left and right. See which one has the lower error
- left_err = get_error(
- data, generate_logistic_fn(P0, r, left, latest, n_days)
- )
- right_err = get_error(
- data, generate_logistic_fn(P0, r, right, latest, n_days)
- )
- print(f"Left Error: {left_err}")
- if(left_err < right_err):
- end = midpt
- error = left_err
- else:
- start = midpt
- error = right_err
- print(generate_logistic_fn(P0, r, left, latest, n_days))
- print(f"Error: {error}")
- print(f"Start, End: ({start}, {end})")
- return (start + end) / 2
- data = pandas.read_csv("./COVID19-Data-Apr04.csv")
- growth_rate = math.log(get_growth_rate(data) )
- P0 = get_init_population(data)
- cases = data["nCases"].to_numpy()
- latest = cases[len(cases) - 1]
- n_days = len(cases)
- approximate_progression = approximate_progression_ratio(cases, P0, growth_rate, n_days, latest)
- print(generate_logistic_fn(P0, growth_rate, approximate_progression, latest,n_days))
- approx_1 = generate_logistic_fn(P0, growth_rate, approximate_progression, latest,n_days *4)
- plt.plot(approx_1)
- plt.scatter(range(n_days), cases)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement