Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- import numpy as np
- import pandas as pd
- y = [1.55, 3.80, 5.40, 2.60, 5.60, 8.30, 7.30, 8.40, 4.20, 5.60]
- x1 = [0.30, 0.60, 1.10, 0.20, 0.80, 1.60, 1.50, 0.90, 0.90, 0.80]
- x2 = [0.80, 1.90, 2.70, 1.30, 2.80, 4.10, 3.60, 4.20, 2.10, 2.80]
- def _col(v):
- v = np.asarray(v).reshape(-1)
- return v[:, None]
- def design_matrix(x1, x2, intercept=True):
- x1c = _col(x1)
- x2c = _col(x2)
- if intercept:
- ones = np.ones_like(x1c)
- X = np.hstack([ones, x1c, x2c])
- else:
- X = np.hstack([x1c, x2c])
- return X
- def safe_inv(M):
- try:
- return np.linalg.inv(M)
- except np.linalg.LinAlgError:
- return np.linalg.pinv(M)
- def _log_beta(a, b):
- return math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
- def _betacf(a, b, x, max_iter=200, eps=3e-14):
- am, bm = 1.0, 1.0
- az = 1.0
- qab = a + b
- qap = a + 1.0
- qam = a - 1.0
- bz = 1.0 - qab * x / qap
- if abs(bz) < 1e-300:
- bz = 1e-300
- em, tem = 0.0, 0.0
- d = 0.0
- ap, bp = 0.0, 0.0
- for m in range(1, max_iter + 1):
- em = float(m)
- tem = em + em
- d = em * (b - em) * x / ((qam + tem) * (a + tem))
- ap = az + d * am
- bp = bz + d * bm
- if abs(bp) < 1e-300:
- bp = 1e-300
- d = -(a + em) * (qab + em) * x / ((a + tem) * (qap + tem))
- app = ap + d * az
- bpp = bp + d * bz
- if abs(bpp) < 1e-300:
- bpp = 1e-300
- am, bm = ap / bpp, bp / bpp
- az, bz = app / bpp, 1.0
- if abs(app - ap) < eps * abs(app):
- return az
- return az
- def betainc_regularized(a, b, x):
- if x <= 0.0:
- return 0.0
- if x >= 1.0:
- return 1.0
- ln_bt = a * math.log(x) + b * math.log(1.0 - x) - _log_beta(a, b)
- if x < (a + 1.0) / (a + b + 2.0):
- cf = _betacf(a, b, x)
- return math.exp(ln_bt) * cf / a
- else:
- cf = _betacf(b, a, 1.0 - x)
- return 1.0 - math.exp((b * math.log(x) + a * math.log(1.0 - x)) - _log_beta(b, a)) * cf / b
- def t_cdf(t, v):
- if v <= 0:
- return float("nan")
- if t == 0.0:
- return 0.5
- x = v / (v + t * t)
- ib = betainc_regularized(v / 2.0, 0.5, x)
- if t > 0:
- return 1.0 - 0.5 * ib
- else:
- return 0.5 * ib
- def t_ppf(p, v, tol=1e-10, max_iter=200):
- if not (0.0 < p < 1.0):
- return float("nan")
- if p == 0.5:
- return 0.0
- lo, hi = -50.0, 50.0
- for _ in range(max_iter):
- mid = (lo + hi) / 2.0
- c = t_cdf(mid, v)
- if abs(c - p) < tol:
- return mid
- if c < p:
- lo = mid
- else:
- hi = mid
- return (lo + hi) / 2.0
- def f_cdf(x, df1, df2):
- if x <= 0.0:
- return 0.0
- z = (df1 * x) / (df1 * x + df2)
- return betainc_regularized(df1 / 2.0, df2 / 2.0, z)
- def f_ppf(p, df1, df2, tol=1e-10, max_iter=200):
- if not (0.0 < p < 1.0):
- return float("nan")
- lo, hi = 0.0, 1.0
- while f_cdf(hi, df1, df2) < p and hi < 1e8:
- hi *= 2.0
- for _ in range(max_iter):
- mid = (lo + hi) / 2.0
- c = f_cdf(mid, df1, df2)
- if abs(c - p) < tol:
- return mid
- if c < p:
- lo = mid
- else:
- hi = mid
- return (lo + hi) / 2.0
- def ols_fit(X, y):
- y = _col(y)
- n, p = X.shape
- XtX = X.T @ X
- XtX_inv = safe_inv(XtX)
- Xty = X.T @ y
- beta = XtX_inv @ Xty
- yhat = X @ beta
- e = y - yhat
- RSS = float((e.T @ e).ravel()[0])
- ybar = float(np.mean(y))
- TSS = float(((y - ybar) ** 2).sum())
- df = n - p
- if df <= 0:
- raise ValueError("Недостатньо ступенів вільності (n - p має бути > 0).")
- s2 = RSS / df
- cov_beta = s2 * XtX_inv
- se = np.sqrt(np.diag(cov_beta)).reshape(-1, 1)
- with np.errstate(divide="ignore", invalid="ignore"):
- tvals = beta / se
- tvals = np.where(np.isfinite(tvals), tvals, np.nan)
- pvals = np.empty_like(tvals)
- for i in range(tvals.size):
- ti = float(abs(tvals[i, 0]))
- pvals[i, 0] = 2.0 * (1.0 - t_cdf(ti, df))
- tcrit = t_ppf(0.975, df)
- ci_low = beta - tcrit * se
- ci_high = beta + tcrit * se
- R2 = 1.0 - RSS / TSS if TSS > 0 else float("nan")
- k = p - 1
- denom = (1.0 - R2) / (n - p) if (n - p) > 0 else float("nan")
- numer = R2 / k if k > 0 else float("nan")
- F = numer / denom if denom not in (None, 0.0) else float("nan")
- F_pvalue = 1.0 - f_cdf(F, k, n - p) if math.isfinite(F) else float("nan")
- F_crit = f_ppf(0.95, k, n - p) if k > 0 else float("nan")
- return dict(
- beta=beta, yhat=yhat, e=e, RSS=RSS, TSS=TSS, s2=s2, se=se,
- tvals=tvals, pvals=pvals, ci_low=ci_low, ci_high=ci_high,
- XtX=XtX, XtX_inv=XtX_inv, cov_beta=cov_beta,
- R2=R2, F=F, F_pvalue=F_pvalue, F_crit=F_crit,
- df_resid=df, k=k, p=p, n=n, tcrit=tcrit
- )
- def standardized_betas(y, x1, x2, beta):
- y = np.asarray(y).reshape(-1)
- x1 = np.asarray(x1).reshape(-1)
- x2 = np.asarray(x2).reshape(-1)
- sy = np.std(y, ddof=1)
- sx1 = np.std(x1, ddof=1)
- sx2 = np.std(x2, ddof=1)
- b1, b2 = float(beta[1, 0]), float(beta[2, 0])
- beta1_std = b1 * (sx1 / sy) if sy > 0 else float("nan")
- beta2_std = b2 * (sx2 / sy) if sy > 0 else float("nan")
- return beta1_std, beta2_std
- def elasticities(y, x1, x2, beta):
- y = np.asarray(y).reshape(-1)
- x1 = np.asarray(x1).reshape(-1)
- x2 = np.asarray(x2).reshape(-1)
- my = float(np.mean(y))
- mx1 = float(np.mean(x1))
- mx2 = float(np.mean(x2))
- b1, b2 = float(beta[1, 0]), float(beta[2, 0])
- E1 = b1 * (mx1 / my) if my != 0 else float("nan")
- E2 = b2 * (mx2 / my) if my != 0 else float("nan")
- return E1, E2
- def prediction_intervals(x_new, fit):
- x_new = np.asarray(x_new).reshape(-1, 1)
- beta = fit["beta"]
- XtX_inv = fit["XtX_inv"]
- s2 = float(fit["s2"])
- s = math.sqrt(s2)
- tcrit = fit["tcrit"]
- df = int(fit["df_resid"])
- yhat_new = float((x_new.T @ beta).ravel()[0])
- leverage = float((x_new.T @ XtX_inv @ x_new).ravel()[0])
- stderr_mean = float(s * math.sqrt(leverage))
- CI_low = yhat_new - tcrit * stderr_mean
- CI_high = yhat_new + tcrit * stderr_mean
- stderr_pred = float(s * math.sqrt(1.0 + leverage))
- PI_low = yhat_new - tcrit * stderr_pred
- PI_high = yhat_new + tcrit * stderr_pred
- return dict(
- yhat=yhat_new,
- CI95_low=CI_low, CI95_high=CI_high,
- PI95_low=PI_low, PI95_high=PI_high,
- df=df, tcrit=float(tcrit)
- )
- def fit_and_report(y, x1, x2):
- src_df = pd.DataFrame({"y": y, "x1": x1, "x2": x2})
- print("=== Вихідні дані (Варіант 2) ===")
- print(src_df.round(4).to_string(index=True))
- print(f"n = {len(y)} спостережень")
- X = design_matrix(x1, x2, intercept=True)
- fit = ols_fit(X, y)
- print("\n--- Матриця X'X ---")
- print(pd.DataFrame(fit["XtX"], index=["1","x1","x2"], columns=["1","x1","x2"]).round(6).to_string())
- print("\n--- (X'X)^(-1) ---")
- print(pd.DataFrame(fit["XtX_inv"], index=["a0","a1","a2"], columns=["a0","a1","a2"]).round(6).to_string())
- a0, a1, a2 = (float(fit["beta"][0,0]), float(fit["beta"][1,0]), float(fit["beta"][2,0]))
- print("\n=== (1) Вибіркове рівняння множинної регресії ===")
- print(f"ŷ = {a0:.6f} + {a1:.6f}·x1 + {a2:.6f}·x2")
- print("\n=== (2) Дисперсія збурень та коваріаційна матриця ===")
- print(f"s_e^2 = {fit['s2']:.6f} (df = {fit['df_resid']})")
- print("Cov(a) = s_e^2 · (X'X)^(-1):")
- print(pd.DataFrame(fit["cov_beta"], index=["a0","a1","a2"], columns=["a0","a1","a2"]).round(6).to_string())
- print("\n=== (3) t-тести для коефіцієнтів (α=0.05) та 95% ДІ ===")
- coef_tbl = pd.DataFrame({
- "coef":[a0, a1, a2],
- "SE": fit["se"].reshape(-1),
- "t": fit["tvals"].reshape(-1),
- "p": fit["pvals"].reshape(-1),
- "CI95_low": fit["ci_low"].reshape(-1),
- "CI95_high": fit["ci_high"].reshape(-1),
- }, index=["a0","a1","a2"])
- print(coef_tbl.round(6).to_string())
- R2 = float(fit["R2"])
- R = float(math.sqrt(R2)) if R2 >= 0 else float("nan")
- print("\n=== (4) Дисперсія та значущість моделі ===")
- print(f"R^2 = {R2:.6f}, R = {R:.6f}")
- print(f"F = {fit['F']:.6f} (df1 = {fit['k']}, df2 = {fit['df_resid']}), p-value = {fit['F_pvalue']:.6g}")
- print(f"F_crit (α=0.05) = {fit['F_crit']:.6f}")
- b1_std, b2_std = standardized_betas(y, x1, x2, fit["beta"])
- E1, E2 = elasticities(y, x1, x2, fit["beta"])
- comp_df = pd.DataFrame({"std_beta":[b1_std, b2_std], "elasticity":[E1, E2]},
- index=["x1 (тис. м²)", "x2 (тис. люд/день)"])
- print("\n=== (5) Порівняння роздільного впливу факторів ===")
- print(comp_df.round(6).to_string())
- stronger_std = "x1" if abs(b1_std) > abs(b2_std) else "x2"
- stronger_el = "x1" if abs(E1) > abs(E2) else "x2"
- print(f"→ За |std_beta| сильніший вплив: {stronger_std}; за |elasticity|: {stronger_el}.")
- x_new = np.array([1.0, 1.2, 15.0])
- pred = prediction_intervals(x_new, fit)
- print("\n=== (6) Прогноз у точці x1=1.2, x2=15 з 95% інтервалами ===")
- print(f"ŷ_new = {pred['yhat']:.6f}")
- print(f"95% CI (середній відгук): [{pred['CI95_low']:.6f}; {pred['CI95_high']:.6f}]")
- print(f"95% PI (нове спостереження): [{pred['PI95_low']:.6f}; {pred['PI95_high']:.6f}]")
- print(f"(t_crit = {pred['tcrit']:.4f}, df = {pred['df']})")
- print("\n=== ПІДСУМОК ===")
- print(f"Модель: ŷ = {a0:.4f} + {a1:.4f}·x1 + {a2:.4f}·x2")
- print(f"R^2 = {R2:.4f}, F({fit['k']},{fit['df_resid']}) = {fit['F']:.4f} (F_crit={fit['F_crit']:.4f})")
- print(f"Прогноз у (1.2; 15): ŷ = {pred['yhat']:.4f}, 95% PI = [{pred['PI95_low']:.4f}; {pred['PI95_high']:.4f}]")
- print("x1 — тис. м², x2 — тис. людей/день; y — млн грн.")
- if __name__ == "__main__":
- fit_and_report(y, x1, x2)
Advertisement
Add Comment
Please, Sign In to add comment