Прикладна_економетрика_ЛР№6_Мірошниченко

import math
import numpy as np
import pandas as pd


y  = [1.55, 3.80, 5.40, 2.60, 5.60, 8.30, 7.30, 8.40, 4.20, 5.60]
x1 = [0.30, 0.60, 1.10, 0.20, 0.80, 1.60, 1.50, 0.90, 0.90, 0.80]
x2 = [0.80, 1.90, 2.70, 1.30, 2.80, 4.10, 3.60, 4.20, 2.10, 2.80]


def _col(v):
    v = np.asarray(v).reshape(-1)
    return v[:, None]

def design_matrix(x1, x2, intercept=True):
    x1c = _col(x1)
    x2c = _col(x2)
    if intercept:
        ones = np.ones_like(x1c)
        X = np.hstack([ones, x1c, x2c])
    else:
        X = np.hstack([x1c, x2c])
    return X

def safe_inv(M):
    try:
        return np.linalg.inv(M)
    except np.linalg.LinAlgError:
        return np.linalg.pinv(M)

def _log_beta(a, b):
    return math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)

def _betacf(a, b, x, max_iter=200, eps=3e-14):
    am, bm = 1.0, 1.0
    az = 1.0
    qab = a + b
    qap = a + 1.0
    qam = a - 1.0

    bz = 1.0 - qab * x / qap
    if abs(bz) < 1e-300:
        bz = 1e-300
    em, tem = 0.0, 0.0
    d = 0.0
    ap, bp = 0.0, 0.0

    for m in range(1, max_iter + 1):
        em = float(m)
        tem = em + em

        d = em * (b - em) * x / ((qam + tem) * (a + tem))
        ap = az + d * am
        bp = bz + d * bm
        if abs(bp) < 1e-300:
            bp = 1e-300

        d = -(a + em) * (qab + em) * x / ((a + tem) * (qap + tem))
        app = ap + d * az
        bpp = bp + d * bz
        if abs(bpp) < 1e-300:
            bpp = 1e-300

        am, bm = ap / bpp, bp / bpp
        az, bz = app / bpp, 1.0
        if abs(app - ap) < eps * abs(app):
            return az
    return az

def betainc_regularized(a, b, x):
    if x <= 0.0:
        return 0.0
    if x >= 1.0:
        return 1.0

    ln_bt = a * math.log(x) + b * math.log(1.0 - x) - _log_beta(a, b)
    if x < (a + 1.0) / (a + b + 2.0):
        cf = _betacf(a, b, x)
        return math.exp(ln_bt) * cf / a
    else:
        cf = _betacf(b, a, 1.0 - x)
        return 1.0 - math.exp((b * math.log(x) + a * math.log(1.0 - x)) - _log_beta(b, a)) * cf / b


def t_cdf(t, v):
    if v <= 0:
        return float("nan")
    if t == 0.0:
        return 0.5
    x = v / (v + t * t)
    ib = betainc_regularized(v / 2.0, 0.5, x)
    if t > 0:
        return 1.0 - 0.5 * ib
    else:
        return 0.5 * ib


def t_ppf(p, v, tol=1e-10, max_iter=200):
    if not (0.0 < p < 1.0):
        return float("nan")

    if p == 0.5:
        return 0.0

    lo, hi = -50.0, 50.0

    for _ in range(max_iter):
        mid = (lo + hi) / 2.0
        c = t_cdf(mid, v)
        if abs(c - p) < tol:
            return mid
        if c < p:
            lo = mid
        else:
            hi = mid
    return (lo + hi) / 2.0


def f_cdf(x, df1, df2):
    if x <= 0.0:
        return 0.0
    z = (df1 * x) / (df1 * x + df2)
    return betainc_regularized(df1 / 2.0, df2 / 2.0, z)


def f_ppf(p, df1, df2, tol=1e-10, max_iter=200):
    if not (0.0 < p < 1.0):
        return float("nan")
    lo, hi = 0.0, 1.0

    while f_cdf(hi, df1, df2) < p and hi < 1e8:
        hi *= 2.0
    for _ in range(max_iter):
        mid = (lo + hi) / 2.0
        c = f_cdf(mid, df1, df2)
        if abs(c - p) < tol:
            return mid
        if c < p:
            lo = mid
        else:
            hi = mid
    return (lo + hi) / 2.0


def ols_fit(X, y):

    y = _col(y)
    n, p = X.shape

    XtX = X.T @ X
    XtX_inv = safe_inv(XtX)
    Xty = X.T @ y
    beta = XtX_inv @ Xty

    yhat = X @ beta
    e = y - yhat

    RSS = float((e.T @ e).ravel()[0])
    ybar = float(np.mean(y))
    TSS = float(((y - ybar) ** 2).sum())

    df = n - p
    if df <= 0:
        raise ValueError("Недостатньо ступенів вільності (n - p має бути > 0).")

    s2 = RSS / df
    cov_beta = s2 * XtX_inv
    se = np.sqrt(np.diag(cov_beta)).reshape(-1, 1)

    with np.errstate(divide="ignore", invalid="ignore"):
        tvals = beta / se
    tvals = np.where(np.isfinite(tvals), tvals, np.nan)
    pvals = np.empty_like(tvals)
    for i in range(tvals.size):
        ti = float(abs(tvals[i, 0]))
        pvals[i, 0] = 2.0 * (1.0 - t_cdf(ti, df))

    tcrit = t_ppf(0.975, df)
    ci_low = beta - tcrit * se
    ci_high = beta + tcrit * se

    R2 = 1.0 - RSS / TSS if TSS > 0 else float("nan")
    k = p - 1
    denom = (1.0 - R2) / (n - p) if (n - p) > 0 else float("nan")
    numer = R2 / k if k > 0 else float("nan")
    F = numer / denom if denom not in (None, 0.0) else float("nan")
    F_pvalue = 1.0 - f_cdf(F, k, n - p) if math.isfinite(F) else float("nan")
    F_crit = f_ppf(0.95, k, n - p) if k > 0 else float("nan")

    return dict(
        beta=beta, yhat=yhat, e=e, RSS=RSS, TSS=TSS, s2=s2, se=se,
        tvals=tvals, pvals=pvals, ci_low=ci_low, ci_high=ci_high,
        XtX=XtX, XtX_inv=XtX_inv, cov_beta=cov_beta,
        R2=R2, F=F, F_pvalue=F_pvalue, F_crit=F_crit,
        df_resid=df, k=k, p=p, n=n, tcrit=tcrit
    )


def standardized_betas(y, x1, x2, beta):
    y = np.asarray(y).reshape(-1)
    x1 = np.asarray(x1).reshape(-1)
    x2 = np.asarray(x2).reshape(-1)
    sy  = np.std(y,  ddof=1)
    sx1 = np.std(x1, ddof=1)
    sx2 = np.std(x2, ddof=1)
    b1, b2 = float(beta[1, 0]), float(beta[2, 0])
    beta1_std = b1 * (sx1 / sy) if sy > 0 else float("nan")
    beta2_std = b2 * (sx2 / sy) if sy > 0 else float("nan")
    return beta1_std, beta2_std

def elasticities(y, x1, x2, beta):
    y  = np.asarray(y).reshape(-1)
    x1 = np.asarray(x1).reshape(-1)
    x2 = np.asarray(x2).reshape(-1)
    my  = float(np.mean(y))
    mx1 = float(np.mean(x1))
    mx2 = float(np.mean(x2))
    b1, b2 = float(beta[1, 0]), float(beta[2, 0])
    E1 = b1 * (mx1 / my) if my != 0 else float("nan")
    E2 = b2 * (mx2 / my) if my != 0 else float("nan")
    return E1, E2

def prediction_intervals(x_new, fit):

    x_new = np.asarray(x_new).reshape(-1, 1)
    beta = fit["beta"]
    XtX_inv = fit["XtX_inv"]
    s2 = float(fit["s2"])
    s  = math.sqrt(s2)
    tcrit = fit["tcrit"]
    df = int(fit["df_resid"])

    yhat_new = float((x_new.T @ beta).ravel()[0])
    leverage = float((x_new.T @ XtX_inv @ x_new).ravel()[0])

    stderr_mean = float(s * math.sqrt(leverage))
    CI_low  = yhat_new - tcrit * stderr_mean
    CI_high = yhat_new + tcrit * stderr_mean

    stderr_pred = float(s * math.sqrt(1.0 + leverage))
    PI_low  = yhat_new - tcrit * stderr_pred
    PI_high = yhat_new + tcrit * stderr_pred

    return dict(
        yhat=yhat_new,
        CI95_low=CI_low, CI95_high=CI_high,
        PI95_low=PI_low, PI95_high=PI_high,
        df=df, tcrit=float(tcrit)
    )


def fit_and_report(y, x1, x2):
    src_df = pd.DataFrame({"y": y, "x1": x1, "x2": x2})
    print("=== Вихідні дані (Варіант 2) ===")
    print(src_df.round(4).to_string(index=True))
    print(f"n = {len(y)} спостережень")

    X = design_matrix(x1, x2, intercept=True)
    fit = ols_fit(X, y)

    print("\n--- Матриця X'X ---")
    print(pd.DataFrame(fit["XtX"], index=["1","x1","x2"], columns=["1","x1","x2"]).round(6).to_string())
    print("\n--- (X'X)^(-1) ---")
    print(pd.DataFrame(fit["XtX_inv"], index=["a0","a1","a2"], columns=["a0","a1","a2"]).round(6).to_string())


    a0, a1, a2 = (float(fit["beta"][0,0]), float(fit["beta"][1,0]), float(fit["beta"][2,0]))
    print("\n=== (1) Вибіркове рівняння множинної регресії ===")
    print(f"ŷ = {a0:.6f} + {a1:.6f}·x1 + {a2:.6f}·x2")


    print("\n=== (2) Дисперсія збурень та коваріаційна матриця ===")
    print(f"s_e^2 = {fit['s2']:.6f}  (df = {fit['df_resid']})")
    print("Cov(a) = s_e^2 · (X'X)^(-1):")
    print(pd.DataFrame(fit["cov_beta"], index=["a0","a1","a2"], columns=["a0","a1","a2"]).round(6).to_string())

    print("\n=== (3) t-тести для коефіцієнтів (α=0.05) та 95% ДІ ===")
    coef_tbl = pd.DataFrame({
        "coef":[a0, a1, a2],
        "SE":  fit["se"].reshape(-1),
        "t":   fit["tvals"].reshape(-1),
        "p":   fit["pvals"].reshape(-1),
        "CI95_low":  fit["ci_low"].reshape(-1),
        "CI95_high": fit["ci_high"].reshape(-1),
    }, index=["a0","a1","a2"])
    print(coef_tbl.round(6).to_string())

    R2 = float(fit["R2"])
    R  = float(math.sqrt(R2)) if R2 >= 0 else float("nan")
    print("\n=== (4) Дисперсія та значущість моделі ===")
    print(f"R^2 = {R2:.6f},  R = {R:.6f}")
    print(f"F = {fit['F']:.6f} (df1 = {fit['k']}, df2 = {fit['df_resid']}),  p-value = {fit['F_pvalue']:.6g}")
    print(f"F_crit (α=0.05) = {fit['F_crit']:.6f}")

    b1_std, b2_std = standardized_betas(y, x1, x2, fit["beta"])
    E1, E2 = elasticities(y, x1, x2, fit["beta"])
    comp_df = pd.DataFrame({"std_beta":[b1_std, b2_std], "elasticity":[E1, E2]},
                           index=["x1 (тис. м²)", "x2 (тис. люд/день)"])
    print("\n=== (5) Порівняння роздільного впливу факторів ===")
    print(comp_df.round(6).to_string())
    stronger_std = "x1" if abs(b1_std) > abs(b2_std) else "x2"
    stronger_el  = "x1" if abs(E1)    > abs(E2)    else "x2"
    print(f"→ За |std_beta| сильніший вплив: {stronger_std};  за |elasticity|: {stronger_el}.")

    x_new = np.array([1.0, 1.2, 15.0])
    pred = prediction_intervals(x_new, fit)
    print("\n=== (6) Прогноз у точці x1=1.2, x2=15 з 95% інтервалами ===")
    print(f"ŷ_new = {pred['yhat']:.6f}")
    print(f"95% CI (середній відгук):    [{pred['CI95_low']:.6f}; {pred['CI95_high']:.6f}]")
    print(f"95% PI (нове спостереження): [{pred['PI95_low']:.6f}; {pred['PI95_high']:.6f}]")
    print(f"(t_crit = {pred['tcrit']:.4f}, df = {pred['df']})")

    print("\n=== ПІДСУМОК ===")
    print(f"Модель: ŷ = {a0:.4f} + {a1:.4f}·x1 + {a2:.4f}·x2")
    print(f"R^2 = {R2:.4f},  F({fit['k']},{fit['df_resid']}) = {fit['F']:.4f}  (F_crit={fit['F_crit']:.4f})")
    print(f"Прогноз у (1.2; 15): ŷ = {pred['yhat']:.4f}, 95% PI = [{pred['PI95_low']:.4f}; {pred['PI95_high']:.4f}]")
    print("x1 — тис. м², x2 — тис. людей/день; y — млн грн.")


if __name__ == "__main__":
    fit_and_report(y, x1, x2)