Cassini / Earth Magnometer Correlations

"""
Saturn Signal Anomalies, Reproducibility Script
=================================================

Five independent tests on publicly available Cassini RPWS and INTERMAGNET data.
Each test includes a null/shuffle control.

Data requirements:
  - Cassini RPWS Key Parameter files (RPWS_KEY__2017*_?.TAB)
    Download from: https://pds-ppi.igpp.ucla.edu/
    Search: "Cassini RPWS":
    Download all 2017 files from:
    "Cassini RPWS Electron Densities from Upper Hybrid and Plasma Wave Frequencies"
    Place in: ./data/rpws_key/

  - INTERMAGNET 1-minute magnetometer data
    Download from: https://imag-data.bgs.ac.uk/GIN_V1/GINForms2
    Place in: ./data/OTT/, ./data/FRD/, ./data/YKC/, etc. (one directory per station)
    Use the Bulk Data tab to easily download all 2017 data for each station.

Setup:
    python -m venv venv
    venv\\Scripts\\activate        # Windows
    source venv/bin/activate       # Linux/Mac
    pip install numpy scipy astropy matplotlib

Usage:
    python reproduce_anomalies.py
    python reproduce_anomalies.py --data-dir /path/to/data
    python reproduce_anomalies.py --tests 1 3 5        # run specific tests
    python reproduce_anomalies.py --no-plots            # tables only
"""

import os
import sys
import glob
import argparse
import warnings
from datetime import datetime, timedelta, timezone
from pathlib import Path
from math import factorial

# ---------------------------------------------------------------------------
# Dependency check
# ---------------------------------------------------------------------------
_missing = []
for _pkg in ['numpy', 'scipy', 'astropy', 'matplotlib']:
    try:
        __import__(_pkg)
    except ImportError:
        _missing.append(_pkg)
if _missing:
    print(f"Missing required packages: {', '.join(_missing)}")
    print(f"Install with:  pip install {' '.join(_missing)}")
    sys.exit(1)

import numpy as np
from scipy import stats, signal
from astropy.timeseries import LombScargle

warnings.filterwarnings('ignore', category=RuntimeWarning)

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
DEFAULT_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
N_SHUFFLE = 1000          # shuffle iterations for null tests
CONFIDENCE = 0.99         # confidence level for significance
RNG_SEED = 42             # reproducibility

# All stations to test (geographic lat, geomagnetic lat, L-shell for reference)
ALL_STATIONS = ['BRW', 'YKC', 'MEA', 'OTT', 'NEW', 'FRD', 'SJG']

rng = np.random.default_rng(RNG_SEED)


#  DATA LOADING


def parse_rpws_timestamp(s):
    """Parse 'YYYY-DDDTHH:MM:SS.sss' to Unix timestamp."""
    try:
        dt = datetime.strptime(s[:20], '%Y-%jT%H:%M:%S.%f')
        return dt.replace(tzinfo=timezone.utc).timestamp()
    except (ValueError, IndexError):
        try:
            dt = datetime.strptime(s[:17], '%Y-%jT%H:%M:%S')
            return dt.replace(tzinfo=timezone.utc).timestamp()
        except (ValueError, IndexError):
            return np.nan


def load_rpws(data_dir):
    """Load Cassini RPWS Key Parameter spectral data."""
    rpws_dir = os.path.join(data_dir, 'rpws_key')
    pattern = os.path.join(rpws_dir, 'RPWS_KEY__2017*_?.TAB')
    files = sorted(glob.glob(pattern))
    if not files:
        for p in ['RPWS_KEY*2017*.TAB', 'rpws_key*2017*.tab', 'RPWS*2017*.TAB']:
            files = sorted(glob.glob(os.path.join(rpws_dir, p)))
            if files:
                break
    if not files:
        print(f"  ERROR: No RPWS files found in {rpws_dir}")
        print(f"  Searched: {pattern}")
        return None, None, None

    print(f"  Found {len(files)} RPWS files")

    frequencies = None
    all_times = []
    all_electric = []

    for filepath in files:
        with open(filepath, 'r') as f:
            lines = f.readlines()
        if len(lines) < 2:
            continue

        if frequencies is None:
            parts = lines[0].split()
            freq_vals = []
            for p in parts:
                try:
                    v = float(p)
                    if v > 0.1:
                        freq_vals.append(v)
                except ValueError:
                    continue
            if freq_vals:
                frequencies = np.array(freq_vals)

        for line in lines[1:]:
            parts = line.split()
            if len(parts) < 3:
                continue
            t = parse_rpws_timestamp(parts[0])
            if np.isnan(t):
                continue
            try:
                vals = [float(x) for x in parts[1:]]
            except ValueError:
                continue
            if frequencies is not None:
                n_freq = len(frequencies)
                if len(vals) >= n_freq:
                    vals = vals[-n_freq:]
                else:
                    continue
            all_times.append(t)
            all_electric.append(vals)

    if not all_times:
        print("  ERROR: No valid RPWS records parsed")
        return None, None, None

    times = np.array(all_times)
    electric = np.array(all_electric)

    order = np.argsort(times)
    times = times[order]
    electric = electric[order]

    n_chan = electric.shape[1] if electric.ndim == 2 else 0
    print(f"  Loaded: {len(times):,} records, {n_chan} channels")
    if frequencies is not None:
        print(f"  Frequency range: {frequencies[0]:.1f} – {frequencies[-1]:.1f} Hz")

    return frequencies, times, electric


def load_magnetometer(station_code, data_dir):
    """Load INTERMAGNET IAGA-2002 magnetometer data for one station."""
    station_dir = Path(data_dir) / station_code
    if not station_dir.exists():
        station_dir = Path(data_dir) / station_code.lower()
    if not station_dir.exists():
        print(f"  WARNING: No directory for {station_code}")
        return None, None

    all_times = []
    all_x = []

    file_list = sorted(
        list(station_dir.glob('*.min')) +
        list(station_dir.glob('*.sec')) +
        list(station_dir.glob('*.txt'))
    )
    if not file_list:
        print(f"  WARNING: No magnetometer files in {station_dir}")
        return None, None

    for fpath in file_list:
        header_done = False
        try:
            with open(fpath, 'r', errors='replace') as f:
                for line in f:
                    if not header_done:
                        if 'DATE' in line and 'TIME' in line:
                            header_done = True
                        continue
                    parts = line.split()
                    if len(parts) < 5:
                        continue
                    try:
                        dt = datetime.strptime(
                            parts[0] + ' ' + parts[1][:8], '%Y-%m-%d %H:%M:%S'
                        )
                        x_val = float(parts[3])
                        if abs(x_val) > 88880:
                            continue
                        all_times.append(
                            dt.replace(tzinfo=timezone.utc).timestamp()
                        )
                        all_x.append(x_val)
                    except (ValueError, IndexError):
                        continue
        except Exception:
            continue

    if not all_times:
        print(f"  WARNING: No valid records for {station_code}")
        return None, None

    times = np.array(all_times)
    values = np.array(all_x)
    order = np.argsort(times)
    times = times[order]
    values = values[order]

    print(f"  {station_code}: {len(times):,} records")
    return times, values


#  HELPER FUNCTIONS

def broadband_power(electric):
    """Compute log10 mean power across all channels per record."""
    with np.errstate(divide='ignore', invalid='ignore'):
        clipped = np.clip(electric, 1e-20, None)
        return np.nanmean(np.log10(clipped), axis=1)


def compute_acf(x, max_lag):
    """Normalized autocorrelation function."""
    x = x - np.nanmean(x)
    n = len(x)
    var = np.nanvar(x)
    if var == 0:
        return np.zeros(max_lag + 1)
    acf = np.zeros(max_lag + 1)
    for lag in range(max_lag + 1):
        valid = ~(np.isnan(x[:n - lag]) | np.isnan(x[lag:]))
        if valid.sum() < 10:
            acf[lag] = np.nan
        else:
            acf[lag] = np.nanmean(x[:n - lag][valid] * x[lag:][valid]) / var
    return acf


def timestamps_to_dow(timestamps):
    """Convert Unix timestamps to day-of-week (0=Monday, 6=Sunday)."""
    return np.array([
        datetime.fromtimestamp(t, tz=timezone.utc).weekday() for t in timestamps
    ])


def hourly_downsample(times, values):
    """Downsample to hourly means, return (unix_times, values)."""
    if len(times) == 0:
        return np.array([]), np.array([])
    hour_ts = (times // 3600) * 3600
    unique_hours = np.unique(hour_ts)
    h_times = []
    h_vals = []
    for h in unique_hours:
        mask = hour_ts == h
        v = values[mask]
        v = v[~np.isnan(v)]
        if len(v) > 0:
            h_times.append(h)
            h_vals.append(np.mean(v))
    return np.array(h_times), np.array(h_vals)


def kruskal_by_group(values, groups, n_groups):
    """Kruskal-Wallis test across groups. Returns H, p, group_medians."""
    group_data = [values[groups == g] for g in range(n_groups)]
    group_data = [g[~np.isnan(g)] for g in group_data]
    group_data = [g for g in group_data if len(g) > 0]
    if len(group_data) < 2:
        return 0.0, 1.0, np.full(n_groups, np.nan)
    H, p = stats.kruskal(*group_data)
    medians = np.array([np.median(g) if len(g) > 0 else np.nan for g in group_data])
    return H, p, medians


def mutual_information_binned(x, y, n_bins=16):
    """Estimate mutual information using 2D histogram."""
    valid = ~(np.isnan(x) | np.isnan(y))
    x, y = x[valid], y[valid]
    if len(x) < 100:
        return np.nan
    hist_2d, _, _ = np.histogram2d(x, y, bins=n_bins)
    pxy = hist_2d / hist_2d.sum()
    px = pxy.sum(axis=1)
    py = pxy.sum(axis=0)
    mask = pxy > 0
    mi = np.sum(pxy[mask] * np.log2(pxy[mask] / (px[:, None] * py[None, :])[mask]))
    return mi


def fishers_combined_p(p_values):
    """Fisher's method: combine independent p-values.
    Returns chi2 statistic and combined p-value."""
    p_arr = np.array(p_values, dtype=float)
    # Clip to avoid log(0)
    p_arr = np.clip(p_arr, 1e-300, 1.0)
    chi2 = -2.0 * np.sum(np.log(p_arr))
    dof = 2 * len(p_arr)
    combined_p = stats.chi2.sf(chi2, dof)
    return chi2, combined_p


def stouffers_z(p_values, one_sided_positive=None, weights=None):
    """Stouffer's method: combine p-values via z-scores.

    Args:
        p_values: array of two-sided p-values
        one_sided_positive: if provided, array of bools indicating whether
            each test's effect was in the expected (positive) direction.
            Two-sided p-values are converted to one-sided.
        weights: optional weights (e.g., sqrt(N) for each station)

    Returns:
        combined_z, combined_p (one-sided if one_sided_positive given)
    """
    p_arr = np.array(p_values, dtype=float)
    p_arr = np.clip(p_arr, 1e-300, 1.0 - 1e-10)

    if one_sided_positive is not None:
        # Convert two-sided to one-sided in the positive direction
        one_sided = np.where(one_sided_positive, p_arr / 2, 1.0 - p_arr / 2)
    else:
        one_sided = p_arr

    z_scores = stats.norm.ppf(1 - one_sided)

    if weights is not None:
        w = np.array(weights, dtype=float)
        combined_z = np.sum(w * z_scores) / np.sqrt(np.sum(w**2))
    else:
        combined_z = np.sum(z_scores) / np.sqrt(len(z_scores))

    combined_p = stats.norm.sf(combined_z)
    return combined_z, combined_p


def compute_n_eff_ratio(x, y, max_acf_lag=200):
    """Compute effective sample size ratio using Bartlett's formula,
    with a floor to prevent over-correction."""
    n = min(len(x), len(y))
    max_acf_lag = min(max_acf_lag, n // 4)
    x_acf = compute_acf(x[:min(5000, n)], max_acf_lag)
    y_acf = compute_acf(y[:min(5000, n)], max_acf_lag)

    # Use only lags where ACF product is positive (avoids overcounting
    # when ACFs oscillate and partially cancel)
    valid = ~(np.isnan(x_acf[1:]) | np.isnan(y_acf[1:]))
    products = x_acf[1:][valid] * y_acf[1:][valid]

    # Truncate sum at first negative product (conservative Bartlett)
    truncated_sum = 0.0
    for p in products:
        if p <= 0:
            break
        truncated_sum += p

    autocorr_sum = 2.0 * truncated_sum
    ratio = 1.0 / max(1.0 + autocorr_sum, 1.0)

    # Floor: n_eff should be at least n/50, below this the estimate
    # is unreliable (means ACF hasn't decayed in the window)
    ratio = max(ratio, 1.0 / 50.0)

    return ratio


#  TEST 1: DAY-OF-WEEK EFFECT IN SKR


def test1_dow_skr(frequencies, times, electric, do_plot=True):
    """Test 1: Is there a 7-day-of-week pattern in Saturn's radio emission?"""
    print("\n" + "=" * 70)
    print("TEST 1: Day-of-Week Effect in SKR")
    print("=" * 70)

    bb = broadband_power(electric)
    valid = ~np.isnan(bb)
    bb_v = bb[valid]
    times_v = times[valid]

    # Remove long-term trend (30-day running median)
    window = 30 * 24 * 60
    if len(bb_v) > window:
        from scipy.ndimage import median_filter
        trend = median_filter(bb_v, size=min(window, len(bb_v) // 2) | 1)
        residual = bb_v - trend
    else:
        residual = bb_v - np.median(bb_v)

    dow = timestamps_to_dow(times_v)

    H_obs, p_obs, medians = kruskal_by_group(residual, dow, 7)

    day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    means = np.array([np.mean(residual[dow == d]) for d in range(7)])

    print(f"\n  Kruskal-Wallis H = {H_obs:.1f}, p = {p_obs:.2e}")
    print(f"  N = {len(residual):,} records\n")
    print(f"  {'Day':<12} {'Mean deviation (dex)':>20} {'N':>10}")
    print(f"  {'-'*42}")
    for d in np.argsort(means)[::-1]:
        n_d = np.sum(dow == d)
        flag = " ← max" if d == np.argmax(means) else (" ← min" if d == np.argmin(means) else "")
        print(f"  {day_names[d]:<12} {means[d]:>+20.4f} {n_d:>10,}{flag}")

    # Shuffle null
    print(f"\n  Shuffle null ({N_SHUFFLE} iterations)...")
    H_null = np.zeros(N_SHUFFLE)
    for i in range(N_SHUFFLE):
        dow_shuf = rng.permutation(dow)
        H_null[i], _, _ = kruskal_by_group(residual, dow_shuf, 7)

    p_empirical = np.mean(H_null >= H_obs)
    print(f"  Observed H = {H_obs:.1f}")
    print(f"  Null H: mean = {np.mean(H_null):.1f}, max = {np.max(H_null):.1f}")
    print(f"  Empirical p = {p_empirical:.4f} ({N_SHUFFLE} shuffles)")

    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(1, 2, figsize=(12, 5))
            colors = ['#d62728' if d == np.argmax(means) else
                      '#1f77b4' if d == np.argmin(means) else '#7f7f7f'
                      for d in range(7)]
            axes[0].bar(range(7), means, color=colors, edgecolor='black', linewidth=0.5)
            axes[0].set_xticks(range(7))
            axes[0].set_xticklabels(day_names)
            axes[0].set_ylabel('Mean SKR deviation (dex)')
            axes[0].set_title(f'SKR Power by Day of Week\nH = {H_obs:.1f}, p = {p_obs:.2e}')
            axes[0].axhline(0, color='black', linewidth=0.5)
            axes[1].hist(H_null, bins=50, color='gray', edgecolor='black',
                         linewidth=0.5, alpha=0.7, label='Null (shuffled)')
            axes[1].axvline(H_obs, color='red', linewidth=2,
                            label=f'Observed H = {H_obs:.1f}')
            axes[1].set_xlabel('Kruskal-Wallis H')
            axes[1].set_ylabel('Count')
            axes[1].set_title(f'Shuffle Control ({N_SHUFFLE} iterations)')
            axes[1].legend()
            plt.tight_layout()
            plt.savefig('test1_dow_skr.png', dpi=150)
            plt.close()
            print("  → Saved: test1_dow_skr.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {'H': H_obs, 'p': p_obs, 'p_shuffle': p_empirical, 'means': means}


#  TEST 2: 16-MINUTE COMB IN AUTOCORRELATION


def test2_16min_comb(frequencies, times, electric, do_plot=True):
    """Test 2: Does the autocorrelation show spikes at multiples of 16 minutes?"""
    print("\n" + "=" * 70)
    print("TEST 2: 16-Minute Comb in Autocorrelation")
    print("=" * 70)

    bb = broadband_power(electric)
    dt_median = np.median(np.diff(times[:1000]))
    print(f"  Median sample spacing: {dt_median:.1f} seconds")

    if dt_median > 120:
        print("  WARNING: Cadence > 2 minutes, 16-minute test may not resolve properly")

    valid = ~np.isnan(bb)
    bb_clean = bb[valid]

    max_lag = 200
    print(f"  Computing ACF to lag {max_lag}...")
    acf = compute_acf(bb_clean, max_lag)

    lags_all = np.arange(1, max_lag + 1)
    is_mult_16 = (lags_all % 16 == 0)

    fit_lags = lags_all[~is_mult_16]
    fit_acf = acf[fit_lags]

    from scipy.optimize import curve_fit

    def acf_envelope(lag, a1, b1, a2, b2, c):
        return a1 * np.exp(-b1 * lag) + a2 * np.exp(-b2 * lag) + c

    try:
        popt, _ = curve_fit(acf_envelope, fit_lags.astype(float), fit_acf,
                            p0=[0.5, 0.05, 0.3, 0.005, 0.0],
                            maxfev=10000, bounds=([0, 0, 0, 0, -1], [2, 1, 2, 1, 1]))
        envelope = acf_envelope(lags_all.astype(float), *popt)
        print(f"  Envelope fit: dual-exponential (residual σ = "
              f"{np.std(fit_acf - acf_envelope(fit_lags.astype(float), *popt)):.5f})")
    except (RuntimeError, ValueError):
        from scipy.interpolate import UnivariateSpline
        spl = UnivariateSpline(fit_lags, fit_acf, s=len(fit_lags) * 0.001)
        envelope = spl(lags_all)
        print("  Envelope fit: spline fallback")

    residuals_all = acf[1:] - envelope

    multiples_16 = np.arange(16, max_lag + 1, 16)
    mult_16_idx = multiples_16 - 1
    non_mult_idx = np.where(~is_mult_16)[0]

    excess_at_16 = residuals_all[mult_16_idx]
    excess_at_other = residuals_all[non_mult_idx]

    mean_excess_16 = np.nanmean(excess_at_16)
    std_other = np.nanstd(excess_at_other)
    z_comb = mean_excess_16 / max(std_other, 1e-10)

    print(f"\n  After removing smooth ACF envelope:")
    print(f"  Mean excess at multiples of 16:  {mean_excess_16:.5f}")
    print(f"  Std of non-16 residuals:         {std_other:.5f}")
    print(f"  Z-score of 16-min comb:          {z_comb:.2f}")
    print()

    print(f"  {'Lag':>6} {'Raw ACF':>9} {'Envelope':>10} {'Excess':>8} {'Z':>6}")
    print(f"  {'-'*45}")
    for m in multiples_16[:8]:
        idx = m - 1
        exc = residuals_all[idx]
        z_i = exc / max(std_other, 1e-10)
        print(f"  {m:>6} {acf[m]:>+9.4f} {envelope[idx]:>+10.4f} {exc:>+8.5f} {z_i:>+6.1f}")

    # Phase-randomized surrogate null
    print(f"\n  Phase-randomized surrogate null ({N_SHUFFLE} iterations)...")
    n_pts = len(bb_clean)
    fft_orig = np.fft.rfft(bb_clean - np.mean(bb_clean))
    amplitudes = np.abs(fft_orig)

    excess_null = np.zeros(N_SHUFFLE)
    for i in range(N_SHUFFLE):
        random_phases = rng.uniform(0, 2 * np.pi, size=len(amplitudes))
        random_phases[0] = 0
        if n_pts % 2 == 0:
            random_phases[-1] = 0
        surrogate = np.fft.irfft(amplitudes * np.exp(1j * random_phases), n=n_pts)
        acf_surr = compute_acf(surrogate, max_lag)
        resid_surr = acf_surr[1:] - envelope
        excess_null[i] = np.nanmean(resid_surr[mult_16_idx])

    p_surrogate = np.mean(excess_null >= mean_excess_16)
    print(f"  Observed excess at 16-multiples: {mean_excess_16:.5f}")
    print(f"  Null excess: mean = {np.mean(excess_null):.5f}, std = {np.std(excess_null):.5f}")
    print(f"  Empirical p = {p_surrogate:.4f}")

    multiples_16_raw = np.arange(16, max_lag + 1, 16)
    non_multiples_raw = np.array([l for l in range(1, max_lag + 1) if l % 16 != 0])
    mean_spike = np.nanmean(acf[multiples_16_raw])
    mean_background = np.nanmean(acf[non_multiples_raw])
    raw_ratio = mean_spike / max(abs(mean_background), 1e-10)
    print(f"\n  Raw ratio (for reference): {raw_ratio:.1f}× (spike/background without envelope removal)")

    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(1, 3, figsize=(18, 5))
            axes[0].bar(lags_all, acf[1:], color='steelblue', width=0.8,
                        edgecolor='none', alpha=0.6, label='ACF')
            axes[0].bar(multiples_16, acf[multiples_16], color='red', width=0.8,
                        edgecolor='none', alpha=0.8, label='Multiples of 16')
            axes[0].plot(lags_all, envelope, 'k-', linewidth=2, label='Fitted envelope')
            axes[0].set_xlabel('Lag (minutes)')
            axes[0].set_ylabel('ACF')
            axes[0].set_title('Raw ACF + Smooth Envelope')
            axes[0].legend(fontsize=8)
            axes[0].set_xlim(0, max_lag)
            colors_resid = ['red' if l % 16 == 0 else 'steelblue' for l in lags_all]
            axes[1].bar(lags_all, residuals_all, color=colors_resid,
                        width=0.8, edgecolor='none')
            axes[1].axhline(0, color='black', linewidth=0.5)
            axes[1].set_xlabel('Lag (minutes)')
            axes[1].set_ylabel('ACF − Envelope')
            axes[1].set_title(f'Residuals (z = {z_comb:.1f} for 16-min comb)')
            axes[1].set_xlim(0, max_lag)
            axes[2].hist(excess_null, bins=50, color='gray', edgecolor='black',
                         linewidth=0.5, alpha=0.7, label='Phase-randomized surrogates')
            axes[2].axvline(mean_excess_16, color='red', linewidth=2,
                            label=f'Observed = {mean_excess_16:.5f}')
            axes[2].set_xlabel('Mean excess at 16-multiples')
            axes[2].set_ylabel('Count')
            axes[2].set_title(f'Surrogate Null (p = {p_surrogate:.4f})')
            axes[2].legend(fontsize=8)
            plt.tight_layout()
            plt.savefig('test2_16min_comb.png', dpi=150)
            plt.close()
            print("  → Saved: test2_16min_comb.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {'raw_ratio': raw_ratio, 'z_comb': z_comb, 'p_surrogate': p_surrogate,
            'mean_excess': mean_excess_16, 'acf': acf}


#  TEST 3: SKR–EARTH MUTUAL INFORMATION


def test3_mutual_information(rpws_times, rpws_electric, mag_times, mag_values,
                             station_code, do_plot=True):
    """Test 3: Is there mutual information between SKR and Earth ground magnetometers?"""
    print("\n" + "=" * 70)
    print(f"TEST 3: SKR–Earth Mutual Information (station: {station_code})")
    print("=" * 70)

    bb = broadband_power(rpws_electric)
    skr_h_times, skr_h_vals = hourly_downsample(rpws_times, bb)

    dt_mag = np.median(np.diff(mag_times[:1000]))
    dhdt = np.abs(np.diff(mag_values)) / max(dt_mag, 1.0)
    mag_deriv_times = mag_times[:-1]
    mag_h_times, mag_h_vals = hourly_downsample(mag_deriv_times, dhdt)

    if len(skr_h_vals) < 100 or len(mag_h_vals) < 100:
        print("  ERROR: Insufficient overlapping hourly data")
        return None

    skr_set = set(skr_h_times.astype(int))
    mag_set = set(mag_h_times.astype(int))
    common = sorted(skr_set & mag_set)

    if len(common) < 100:
        print(f"  ERROR: Only {len(common)} common hours, need at least 100")
        return None

    print(f"  Common hourly bins: {len(common)}")

    skr_lookup = dict(zip(skr_h_times.astype(int), skr_h_vals))
    mag_lookup = dict(zip(mag_h_times.astype(int), mag_h_vals))

    skr_aligned = np.array([skr_lookup[h] for h in common])
    mag_aligned = np.array([mag_lookup[h] for h in common])

    from scipy.stats import rankdata
    skr_ranked = rankdata(skr_aligned) / len(skr_aligned)
    mag_ranked = rankdata(mag_aligned) / len(mag_aligned)

    lags_hours = np.arange(0, 337, 2)
    n_mi_bins = 24
    mi_observed = np.zeros(len(lags_hours))
    mi_null_mean = np.zeros(len(lags_hours))
    mi_null_std = np.zeros(len(lags_hours))
    z_scores = np.zeros(len(lags_hours))

    n = len(skr_ranked)
    n_shuffle_per_lag = min(N_SHUFFLE, 500)
    print(f"  Computing MI at {len(lags_hours)} lags (0–336h, step=2h)...")
    print(f"  Using {n_mi_bins} bins, rank-transformed data, {n_shuffle_per_lag} shuffles/lag")

    for i, lag in enumerate(lags_hours):
        if lag == 0:
            x, y = skr_ranked, mag_ranked
        elif lag < n:
            x = skr_ranked[:n - lag]
            y = mag_ranked[lag:]
        else:
            mi_observed[i] = np.nan
            continue

        mi_observed[i] = mutual_information_binned(x, y, n_bins=n_mi_bins)

        block_size = 24
        n_blocks = len(y) // block_size
        mi_shuf = np.zeros(n_shuffle_per_lag)
        for j in range(n_shuffle_per_lag):
            if n_blocks > 1:
                block_indices = rng.permutation(n_blocks)
                y_shuf = np.concatenate([
                    y[bi * block_size:(bi + 1) * block_size]
                    for bi in block_indices
                ])
                min_len = min(len(x), len(y_shuf))
                mi_shuf[j] = mutual_information_binned(
                    x[:min_len], y_shuf[:min_len], n_bins=n_mi_bins
                )
            else:
                y_shuf = rng.permutation(y)
                mi_shuf[j] = mutual_information_binned(x, y_shuf, n_bins=n_mi_bins)

        mi_null_mean[i] = np.nanmean(mi_shuf)
        mi_null_std[i] = np.nanstd(mi_shuf)
        if mi_null_std[i] > 0:
            z_scores[i] = (mi_observed[i] - mi_null_mean[i]) / mi_null_std[i]
        else:
            z_scores[i] = 0

    sig_lags_mi = np.sum(z_scores > 3)
    mean_z = np.nanmean(z_scores)

    # Spearman correlation at each lag
    print(f"\n  Complementary Spearman correlation test...")
    spearman_r = np.zeros(len(lags_hours))
    spearman_p = np.zeros(len(lags_hours))
    for i, lag in enumerate(lags_hours):
        if lag == 0:
            x, y = skr_aligned, mag_aligned
        elif lag < n:
            x = skr_aligned[:n - lag]
            y = mag_aligned[lag:]
        else:
            spearman_r[i] = np.nan
            spearman_p[i] = np.nan
            continue
        r, p = stats.spearmanr(x, y)
        spearman_r[i] = r
        spearman_p[i] = p

    # Effective degrees of freedom correction using truncated Bartlett
    n_eff_ratio = compute_n_eff_ratio(skr_aligned, mag_aligned)
    n_eff = max(int(n * n_eff_ratio), 20)
    print(f"  Effective sample size: {n_eff} / {n} (ratio = {n_eff_ratio:.3f})")

    # Recompute Spearman p-values with effective N
    spearman_p_corrected = np.ones(len(lags_hours))
    for i in range(len(lags_hours)):
        if np.isnan(spearman_r[i]):
            continue
        r_val = spearman_r[i]
        lag = lags_hours[i]
        n_at_lag = n - lag if lag < n else 0
        n_eff_at_lag = max(int(n_at_lag * n_eff_ratio), 4)
        if abs(r_val) < 1.0 and n_eff_at_lag > 3:
            t_stat = r_val * np.sqrt((n_eff_at_lag - 2) / (1 - r_val**2))
            spearman_p_corrected[i] = 2.0 * stats.t.sf(abs(t_stat), df=n_eff_at_lag - 2)
        else:
            spearman_p_corrected[i] = 1.0

    sig_spearman_raw = np.sum(spearman_p < 0.001)
    sig_spearman_corrected = np.sum(spearman_p_corrected < 0.001)

    # Best Spearman correlation (by corrected p)
    valid_mask = ~np.isnan(spearman_r)
    best_r_idx = np.nanargmax(np.abs(spearman_r)) if np.any(valid_mask) else 0
    best_r = spearman_r[best_r_idx] if np.any(valid_mask) else 0
    best_r_p_raw = spearman_p[best_r_idx] if np.any(valid_mask) else 1
    best_r_p_adj = spearman_p_corrected[best_r_idx] if np.any(valid_mask) else 1

    print(f"\n  === MI Results ===")
    print(f"  Lags with z > 3 (block-shuffle): {sig_lags_mi} / {len(lags_hours)}")
    print(f"  Mean z-score: {mean_z:.2f}")
    if np.any(~np.isnan(z_scores)):
        print(f"  Max z-score: {np.nanmax(z_scores):.2f} at lag {lags_hours[np.nanargmax(z_scores)]}h")

    print(f"\n  === Spearman Results ===")
    print(f"  Lags with p < 0.001 (raw):       {sig_spearman_raw} / {len(lags_hours)}")
    print(f"  Lags with p < 0.001 (n_eff adj): {sig_spearman_corrected} / {len(lags_hours)}")
    print(f"  Max |r| = {best_r:+.4f} at lag {lags_hours[best_r_idx]}h "
          f"(p_raw = {best_r_p_raw:.2e}, p_adj = {best_r_p_adj:.2e})")

    # Overall significance: combine MI block-shuffle evidence
    # with corrected Spearman evidence
    # Station is "significant" if EITHER:
    #   - MI: multiple lags exceed block-shuffle null (≥3 at z>3,
    #     well above the ~0.2 expected by chance from 169 lags)
    #   - Spearman: max |r| survives n_eff correction at p<0.01
    sig_mi = sig_lags_mi >= 3
    sig_spearman = best_r_p_adj < 0.01
    sig_overall = sig_mi or sig_spearman

    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(1, 3, figsize=(18, 5))
            axes[0].plot(lags_hours, mi_observed, 'b-', linewidth=1, label='Observed MI')
            axes[0].fill_between(lags_hours,
                                 mi_null_mean - 2 * mi_null_std,
                                 mi_null_mean + 2 * mi_null_std,
                                 alpha=0.3, color='gray', label='Null ±2σ (block-shuffle)')
            axes[0].plot(lags_hours, mi_null_mean, 'k--', linewidth=0.5, label='Null mean')
            axes[0].set_xlabel('Lag (hours)')
            axes[0].set_ylabel('Mutual Information (bits)')
            axes[0].set_title(f'SKR ↔ {station_code} MI (rank-transformed, {n_mi_bins} bins)')
            axes[0].legend(fontsize=8)
            colors_z = ['red' if z > 3 else 'steelblue' for z in z_scores]
            axes[1].bar(lags_hours, z_scores, color=colors_z, width=1.8)
            axes[1].axhline(3, color='red', linestyle='--', linewidth=1, label='z = 3')
            axes[1].axhline(-3, color='red', linestyle='--', linewidth=1)
            axes[1].set_xlabel('Lag (hours)')
            axes[1].set_ylabel('z-score (block-shuffle null)')
            axes[1].set_title(f'MI Significance ({station_code})')
            axes[1].legend()
            sig_mask_raw = spearman_p < 0.001
            sig_mask_adj = spearman_p_corrected < 0.001
            for idx in range(len(lags_hours)):
                if sig_mask_adj[idx]:
                    axes[2].scatter(lags_hours[idx], spearman_r[idx],
                                    c='red', s=12, zorder=5)
                elif sig_mask_raw[idx]:
                    axes[2].scatter(lags_hours[idx], spearman_r[idx],
                                    c='orange', s=8, zorder=4)
                else:
                    axes[2].scatter(lags_hours[idx], spearman_r[idx],
                                    c='gray', s=5, alpha=0.5, zorder=3)
            axes[2].axhline(0, color='black', linewidth=0.5)
            axes[2].set_xlabel('Lag (hours)')
            axes[2].set_ylabel('Spearman r')
            axes[2].set_title(f'Spearman ({station_code}) [red=n_eff adj, orange=raw only]')
            plt.tight_layout()
            plt.savefig(f'test3_mi_{station_code}.png', dpi=150)
            plt.close()
            print(f"  → Saved: test3_mi_{station_code}.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {
        'sig_lags_mi': sig_lags_mi,
        'mean_z': mean_z,
        'max_z': np.nanmax(z_scores) if np.any(~np.isnan(z_scores)) else 0,
        'sig_lags_spearman_raw': sig_spearman_raw,
        'sig_lags_spearman_adj': sig_spearman_corrected,
        'n_eff_ratio': n_eff_ratio,
        'max_abs_r': abs(best_r),
        'max_r_p_raw': best_r_p_raw,
        'max_r_p_adj': best_r_p_adj,
        'sig_mi': sig_mi,
        'sig_spearman': sig_spearman,
        'sig_overall': sig_overall,
    }


#  TEST 4: DIRECTIONAL ASYMMETRY


def test4_directionality(rpws_times, rpws_electric, mag_times, mag_values,
                         station_code, do_plot=True):
    """Test 4: Does information flow bidirectionally between SKR and Earth?"""
    print("\n" + "=" * 70)
    print(f"TEST 4: Directional Asymmetry (station: {station_code})")
    print("=" * 70)

    bb = broadband_power(rpws_electric)
    skr_h_t, skr_h_v = hourly_downsample(rpws_times, bb)
    dhdt = np.abs(np.diff(mag_values)) / max(np.median(np.diff(mag_times[:1000])), 1.0)
    mag_h_t, mag_h_v = hourly_downsample(mag_times[:-1], dhdt)

    skr_lookup = dict(zip(skr_h_t.astype(int), skr_h_v))
    mag_lookup = dict(zip(mag_h_t.astype(int), mag_h_v))
    common = sorted(set(skr_h_t.astype(int)) & set(mag_h_t.astype(int)))

    if len(common) < 200:
        print(f"  ERROR: Only {len(common)} common hours")
        return None

    skr = np.array([skr_lookup[h] for h in common])
    mag = np.array([mag_lookup[h] for h in common])

    lags = np.arange(1, 250, 3)
    mi_skr_to_earth = np.zeros(len(lags))
    mi_earth_to_skr = np.zeros(len(lags))
    n = len(skr)

    print(f"  Computing directional MI at {len(lags)} lags...")

    for i, lag in enumerate(lags):
        if lag >= n:
            mi_skr_to_earth[i] = np.nan
            mi_earth_to_skr[i] = np.nan
            continue
        mi_skr_to_earth[i] = mutual_information_binned(skr[:n - lag], mag[lag:])
        mi_earth_to_skr[i] = mutual_information_binned(mag[:n - lag], skr[lag:])

    asymmetry = mi_skr_to_earth - mi_earth_to_skr

    short_mask = lags <= 70
    long_mask = (lags >= 130) & (lags <= 200)

    asym_short = np.nanmean(asymmetry[short_mask])
    asym_long = np.nanmean(asymmetry[long_mask])

    # Effect size: is the asymmetry large relative to MI itself?
    mean_mi = np.nanmean((mi_skr_to_earth + mi_earth_to_skr) / 2)
    relative_asym_short = asym_short / max(mean_mi, 1e-10) if mean_mi > 0 else 0
    relative_asym_long = asym_long / max(mean_mi, 1e-10) if mean_mi > 0 else 0

    print(f"\n  Mean MI level: {mean_mi:.5f} bits")
    print(f"\n  Directional asymmetry (positive = SKR leads):")
    print(f"    Short lags (1–70h):   {asym_short:+.5f} ({relative_asym_short:+.1%} of mean MI)")
    print(f"    Long lags (130–200h): {asym_long:+.5f} ({relative_asym_long:+.1%} of mean MI)")
    if asym_short * asym_long < 0:
        print(f"  → SIGN REVERSAL: direction flips with timescale")
    else:
        print(f"  → No sign reversal")

    # Shuffle null
    n_shuf = min(N_SHUFFLE, 200)
    print(f"  Shuffle null ({n_shuf} iterations)...")
    asym_short_null = np.zeros(n_shuf)
    asym_long_null = np.zeros(n_shuf)
    for j in range(n_shuf):
        skr_shuf = rng.permutation(skr)
        a_shuf = np.zeros(len(lags))
        for i, lag in enumerate(lags):
            if lag >= n:
                continue
            a_shuf[i] = (
                mutual_information_binned(skr_shuf[:n - lag], mag[lag:]) -
                mutual_information_binned(mag[:n - lag], skr_shuf[lag:])
            )
        asym_short_null[j] = np.nanmean(a_shuf[short_mask])
        asym_long_null[j] = np.nanmean(a_shuf[long_mask])

    obs_asym = np.nanmean(asymmetry)
    asym_null_overall = (asym_short_null + asym_long_null) / 2
    z_asym = (obs_asym - np.mean(asym_null_overall)) / max(np.std(asym_null_overall), 1e-10)

    # Separate z-scores for short and long
    z_short = (asym_short - np.mean(asym_short_null)) / max(np.std(asym_short_null), 1e-10)
    z_long = (asym_long - np.mean(asym_long_null)) / max(np.std(asym_long_null), 1e-10)

    # Reversal significance: how often in null do short and long have opposite signs?
    null_reversal_rate = np.mean(asym_short_null * asym_long_null < 0)
    obs_reversal = asym_short * asym_long < 0

    print(f"  Overall mean asymmetry: {obs_asym:+.5f} (z = {z_asym:.2f})")
    print(f"  Short-lag z: {z_short:.2f}, Long-lag z: {z_long:.2f}")
    print(f"  Null reversal rate: {null_reversal_rate:.1%} "
          f"(observed: {'YES' if obs_reversal else 'NO'})")

    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(1, 2, figsize=(14, 5))
            axes[0].plot(lags, mi_skr_to_earth, 'b-', alpha=0.7, label='SKR → Earth')
            axes[0].plot(lags, mi_earth_to_skr, 'r-', alpha=0.7, label='Earth → SKR')
            axes[0].set_xlabel('Lag (hours)')
            axes[0].set_ylabel('Mutual Information (bits)')
            axes[0].set_title(f'Directional MI: SKR ↔ {station_code}')
            axes[0].legend()
            axes[1].fill_between(lags, asymmetry, 0,
                                 where=asymmetry > 0, alpha=0.5, color='blue',
                                 label='SKR leads')
            axes[1].fill_between(lags, asymmetry, 0,
                                 where=asymmetry < 0, alpha=0.5, color='red',
                                 label='Earth leads')
            axes[1].axhline(0, color='black', linewidth=0.5)
            axes[1].axvspan(1, 70, alpha=0.1, color='red', label='Short lag band')
            axes[1].axvspan(130, 200, alpha=0.1, color='blue', label='Long lag band')
            axes[1].set_xlabel('Lag (hours)')
            axes[1].set_ylabel('Asymmetry (SKR→E minus E→SKR)')
            axes[1].set_title(f'Directional Asymmetry ({station_code})')
            axes[1].legend(fontsize=8)
            plt.tight_layout()
            plt.savefig(f'test4_direction_{station_code}.png', dpi=150)
            plt.close()
            print(f"  → Saved: test4_direction_{station_code}.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {
        'asym_short': asym_short,
        'asym_long': asym_long,
        'relative_asym_short': relative_asym_short,
        'relative_asym_long': relative_asym_long,
        'sign_reversal': obs_reversal,
        'null_reversal_rate': null_reversal_rate,
        'z_overall': z_asym,
        'z_short': z_short,
        'z_long': z_long,
        'mean_mi': mean_mi
    }


#  TEST 5: SKR → EARTH PREDICTION


def test5_substorm_prediction(rpws_times, rpws_electric, mag_times, mag_values,
                              station_code, do_plot=True):
    """Test 5: Does SKR state at Saturn predict geomagnetic activity at Earth?

    For each SKR band × Earth metric × lag combination:
    - Measure t-statistic and effect size at that lag
    - Test against shuffle null (random event placement)
    - Report all results with multiple-comparison correction
    """
    print("\n" + "=" * 70)
    print(f"TEST 5: SKR → Earth Prediction (station: {station_code})")
    print("=" * 70)

    bb = broadband_power(rpws_electric)
    n_chan = rpws_electric.shape[1]

    third = n_chan // 3
    with np.errstate(divide='ignore', invalid='ignore'):
        low_band = np.nanmean(np.log10(np.clip(rpws_electric[:, :third], 1e-20, None)), axis=1)
        mid_band = np.nanmean(np.log10(np.clip(rpws_electric[:, third:2*third], 1e-20, None)), axis=1)
        high_band = np.nanmean(np.log10(np.clip(rpws_electric[:, 2*third:], 1e-20, None)), axis=1)

    skr_bands = {
        'broadband': bb,
        'low_third': low_band,
        'mid_third': mid_band,
        'high_third': high_band
    }

    print(f"  Channels: {n_chan} total, split into thirds ({third} each)")

    dt_mag = np.median(np.diff(mag_times[:1000]))
    dhdt = np.abs(np.diff(mag_values)) / max(dt_mag, 1.0)
    dhdt_h_t, dhdt_h_v = hourly_downsample(mag_times[:-1], dhdt)

    hour_ts = (mag_times // 3600) * 3600
    unique_hours = np.unique(hour_ts)
    h_range_times = []
    h_range_vals = []
    h_std_vals = []
    for h in unique_hours:
        mask = hour_ts == h
        v = mag_values[mask]
        v = v[~np.isnan(v) & (np.abs(v) < 88880)]
        if len(v) > 10:
            h_range_times.append(h)
            h_range_vals.append(np.max(v) - np.min(v))
            h_std_vals.append(np.std(v))
    h_range_times = np.array(h_range_times)
    h_range_vals = np.array(h_range_vals)
    h_std_vals = np.array(h_std_vals)

    earth_metrics = {
        '|dH/dt|': (dhdt_h_t, dhdt_h_v),
        'hourly_range': (h_range_times, h_range_vals),
        'hourly_std': (h_range_times, h_std_vals)
    }

    # ================================================================
    # Full scan: all band × metric × lag combinations
    # ================================================================
    scan_lags = np.arange(0, 337, 3)  # 0 to 336h in 3h steps
    print(f"\n  Scanning {len(skr_bands)} bands × {len(earth_metrics)} metrics × "
          f"{len(scan_lags)} lags ({scan_lags[0]}–{scan_lags[-1]}h, 3h steps)")

    all_results = []  # every band × metric × lag

    for band_name, band_vals in skr_bands.items():
        skr_h_t, skr_h_v = hourly_downsample(rpws_times, band_vals)
        skr_lookup = dict(zip(skr_h_t.astype(int), skr_h_v))

        for metric_name, (m_h_t, m_h_v) in earth_metrics.items():
            m_lookup = dict(zip(m_h_t.astype(int), m_h_v))
            common = sorted(set(skr_h_t.astype(int)) & set(m_h_t.astype(int)))
            if len(common) < 200:
                continue

            skr_aligned = np.array([skr_lookup[h] for h in common])
            earth_aligned = np.array([m_lookup[h] for h in common])
            skr_m = np.nanmean(skr_aligned)
            skr_sd = np.nanstd(skr_aligned)

            valid_earth = earth_aligned[~np.isnan(earth_aligned)]
            if len(valid_earth) < 50:
                continue
            threshold_95 = np.nanpercentile(valid_earth, 95)
            event_indices = np.where(earth_aligned > threshold_95)[0]
            if len(event_indices) == 0:
                continue

            clean = [event_indices[0]]
            for e in event_indices[1:]:
                if e - clean[-1] >= 24:
                    clean.append(e)
            events = np.array(clean)

            for lag in scan_lags:
                pre_vals = []
                for ev in events:
                    idx = ev - lag
                    if 0 <= idx < len(skr_aligned):
                        v = skr_aligned[idx]
                        if not np.isnan(v):
                            pre_vals.append(v)
                if len(pre_vals) >= 10:
                    pre_vals = np.array(pre_vals)
                    t_val, p_val = stats.ttest_1samp(pre_vals, skr_m)
                    delta = np.mean(pre_vals) - skr_m
                    cohens_d = delta / max(skr_sd, 1e-10)
                    all_results.append({
                        'band': band_name,
                        'metric': metric_name,
                        'lag': lag,
                        't': t_val,
                        'p': p_val,
                        'delta': delta,
                        'cohens_d': cohens_d,
                        'n_events': len(pre_vals),
                        'skr_aligned': skr_aligned,
                        'earth_aligned': earth_aligned,
                        'events': events,
                        'skr_mean': skr_m,
                        'skr_std': skr_sd,
                        'common': common,
                    })

    n_tests = len(all_results)
    print(f"  Total valid combinations: {n_tests}")

    if n_tests == 0:
        print("  No valid combinations found")
        return None

    # Benjamini-Hochberg FDR correction
    by_p = sorted(range(n_tests), key=lambda i: all_results[i]['p'])
    for bh_rank, orig_idx in enumerate(by_p):
        raw_p = all_results[orig_idx]['p']
        bh_adjusted = raw_p * n_tests / (bh_rank + 1)
        all_results[orig_idx]['p_bh'] = min(bh_adjusted, 1.0)
    running_min = 1.0
    for i in range(n_tests - 1, -1, -1):
        orig_idx = by_p[i]
        running_min = min(running_min, all_results[orig_idx]['p_bh'])
        all_results[orig_idx]['p_bh'] = running_min

    all_results.sort(key=lambda x: abs(x['t']), reverse=True)

    n_raw_05 = sum(1 for r in all_results if r['p'] < 0.05)
    n_bh_05 = sum(1 for r in all_results if r['p_bh'] < 0.05)
    n_bh_10 = sum(1 for r in all_results if r['p_bh'] < 0.10)
    expected_false = n_tests * 0.05

    print(f"\n  Raw p < 0.05: {n_raw_05}/{n_tests} "
          f"(expected by chance: {expected_false:.1f})")
    print(f"  BH-FDR q < 0.05: {n_bh_05}/{n_tests}")
    print(f"  BH-FDR q < 0.10: {n_bh_10}/{n_tests}")

    # ================================================================
    # Report all results with p < 0.05 (raw)
    # ================================================================
    print(f"\n  All combinations with raw p < 0.05:")
    print(f"  {'Band':<12} {'Metric':<14} {'Lag':>5} {'t':>8} {'p(raw)':>10} "
          f"{'p(BH)':>10} {'d':>8} {'N':>4}")
    print(f"  {'-'*76}")
    for r in all_results:
        if r['p'] < 0.05:
            bh_flag = " *" if r['p_bh'] < 0.05 else ""
            print(f"  {r['band']:<12} {r['metric']:<14} {r['lag']:>4}h "
                  f"{r['t']:>+8.3f} {r['p']:>10.6f} {r['p_bh']:>10.4f} "
                  f"{r['cohens_d']:>+8.4f} {r['n_events']:>4}{bh_flag}")

    # ================================================================
    # Shuffle null for every combination that survived BH-FDR q < 0.10
    # ================================================================
    survivors = [r for r in all_results if r['p_bh'] < 0.10]
    if not survivors:
        # If nothing survives BH, shuffle the top 5 anyway
        survivors = all_results[:min(5, n_tests)]
        print(f"\n  No results survived BH q < 0.10. "
              f"Shuffle testing top {len(survivors)} by |t|:")
    else:
        print(f"\n  Shuffle null ({N_SHUFFLE} iterations) for "
              f"{len(survivors)} results with BH q < 0.10:")

    print(f"  {'Band':<12} {'Metric':<14} {'Lag':>5} {'t':>8} {'p(BH)':>10} "
          f"{'p(shuf)':>10} {'d':>8}")
    print(f"  {'-'*76}")

    shuffle_results = []
    for sr in survivors:
        skr_aligned = sr['skr_aligned']
        events = sr['events']
        skr_mean = sr['skr_mean']
        lag = sr['lag']

        eligible = np.arange(max(lag + 1, 336), len(skr_aligned))
        null_t = np.zeros(N_SHUFFLE)

        for k in range(N_SHUFFLE):
            if len(eligible) >= len(events):
                fake_events = rng.choice(eligible, size=len(events), replace=False)
            else:
                fake_events = rng.choice(
                    np.arange(lag + 1, len(skr_aligned)),
                    size=len(events), replace=False
                )
            pre_fake = []
            for ev in fake_events:
                idx = ev - lag
                if 0 <= idx < len(skr_aligned):
                    v = skr_aligned[idx]
                    if not np.isnan(v):
                        pre_fake.append(v)
            if len(pre_fake) >= 10:
                null_t[k], _ = stats.ttest_1samp(pre_fake, skr_mean)

        p_shuffle = np.mean(np.abs(null_t) >= np.abs(sr['t']))
        sr['p_shuffle'] = p_shuffle
        sr['null_t'] = null_t
        shuffle_results.append(sr)

        print(f"  {sr['band']:<12} {sr['metric']:<14} {sr['lag']:>4}h "
              f"{sr['t']:>+8.3f} {sr['p_bh']:>10.4f} {p_shuffle:>10.4f} "
              f"{sr['cohens_d']:>+8.4f}")

    # ================================================================
    # Dense lag curve for best band × metric pair
    # ================================================================
    best = all_results[0]
    print(f"\n   Lag curve: {best['band']} → {best['metric']} ")

    skr_aligned = best['skr_aligned']
    events = best['events']
    skr_mean = best['skr_mean']
    skr_std = best['skr_std']

    lag_t = np.zeros(len(scan_lags))
    lag_p = np.ones(len(scan_lags))
    lag_d = np.zeros(len(scan_lags))
    lag_n = np.zeros(len(scan_lags), dtype=int)

    for i, lag in enumerate(scan_lags):
        pre_vals = []
        for ev in events:
            idx = ev - lag
            if 0 <= idx < len(skr_aligned):
                v = skr_aligned[idx]
                if not np.isnan(v):
                    pre_vals.append(v)
        if len(pre_vals) >= 10:
            pre_vals = np.array(pre_vals)
            lag_t[i], lag_p[i] = stats.ttest_1samp(pre_vals, skr_mean)
            lag_n[i] = len(pre_vals)
            lag_d[i] = (np.mean(pre_vals) - skr_mean) / max(skr_std, 1e-10)

    # Print the full curve
    print(f"  {'Lag':>6} {'t':>8} {'p':>10} {'d':>8} {'N':>4}")
    print(f"  {'-'*40}")
    for i, lag in enumerate(scan_lags):
        if lag_n[i] > 0:
            sig = " *" if lag_p[i] < 0.05 else ""
            print(f"  {lag:>5}h {lag_t[i]:>+8.3f} {lag_p[i]:>10.4f} "
                  f"{lag_d[i]:>+8.4f} {lag_n[i]:>4}{sig}")

    # Event statistics
    if len(events) > 2:
        spacings = np.diff(events).astype(float)
        print(f"\n  Events: N={len(events)}, "
              f"spacing mean={np.mean(spacings):.1f}h, "
              f"median={np.median(spacings):.1f}h, "
              f"min={np.min(spacings):.0f}h, max={np.max(spacings):.0f}h")

    # ================================================================
    # Cross-band lag curves (all bands, same events & metric)
    # ================================================================
    print(f"\n   Cross-band comparison (same events from {best['metric']}) ")

    band_lag_curves = {}
    for band_name, band_vals in skr_bands.items():
        b_h_t, b_h_v = hourly_downsample(rpws_times, band_vals)
        b_lookup = dict(zip(b_h_t.astype(int), b_h_v))
        b_aligned = np.array([b_lookup.get(h, np.nan) for h in best['common']])
        b_mean = np.nanmean(b_aligned)
        b_std = np.nanstd(b_aligned)

        bt = np.zeros(len(scan_lags))
        for i, lag in enumerate(scan_lags):
            pre_vals = []
            for ev in events:
                idx = ev - lag
                if 0 <= idx < len(b_aligned):
                    v = b_aligned[idx]
                    if not np.isnan(v):
                        pre_vals.append(v)
            if len(pre_vals) >= 10:
                bt[i], _ = stats.ttest_1samp(pre_vals, b_mean)

        band_lag_curves[band_name] = bt

    # Correlation between band lag curves
    print(f"\n  Band lag curve correlations:")
    band_names = list(band_lag_curves.keys())
    cross_corrs = {}
    for i in range(len(band_names)):
        for j in range(i + 1, len(band_names)):
            b1, b2 = band_names[i], band_names[j]
            t1 = band_lag_curves[b1]
            t2 = band_lag_curves[b2]
            mask = (t1 != 0) & (t2 != 0)
            if np.sum(mask) >= 10:
                r, pr = stats.pearsonr(t1[mask], t2[mask])
                cross_corrs[(b1, b2)] = (r, pr)
                print(f"    {b1} vs {b2}: r = {r:+.3f}, p = {pr:.6f}")

    # ================================================================
    # Plotting
    # ================================================================
    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(2, 3, figsize=(20, 12))

            # Top-left: lag curve (t-statistic)
            colors = ['orangered' if lag_p[i] < 0.05 else 'steelblue'
                      for i in range(len(scan_lags))]
            axes[0, 0].bar(scan_lags, lag_t, color=colors, width=2.5,
                           edgecolor='none', alpha=0.8)
            axes[0, 0].axhline(1.96, color='red', ls='--', lw=0.8, alpha=0.4)
            axes[0, 0].axhline(-1.96, color='red', ls='--', lw=0.8, alpha=0.4)
            axes[0, 0].axhline(0, color='black', lw=0.5, alpha=0.3)
            axes[0, 0].axvline(168, color='green', lw=2, alpha=0.5, label='168h')
            axes[0, 0].set_xlabel('Lag (hours)')
            axes[0, 0].set_ylabel('t-statistic')
            axes[0, 0].set_title(f'{best["band"]} → {station_code} {best["metric"]}')
            axes[0, 0].legend(fontsize=8)

            # Top-middle: all band lag curves
            for bn in band_names:
                lw = 2.5 if bn == best['band'] else 1.0
                alpha = 1.0 if bn == best['band'] else 0.5
                axes[0, 1].plot(scan_lags, band_lag_curves[bn],
                                label=bn, linewidth=lw, alpha=alpha)
            axes[0, 1].axhline(0, color='black', lw=0.5)
            axes[0, 1].axvline(168, color='green', lw=1.5, alpha=0.5, ls='--')
            axes[0, 1].set_xlabel('Lag (hours)')
            axes[0, 1].set_ylabel('t-statistic')
            axes[0, 1].set_title('All SKR bands')
            axes[0, 1].legend(fontsize=7)

            # Top-right: effect size curve
            axes[0, 2].bar(scan_lags, lag_d, color=colors, width=2.5,
                           edgecolor='none', alpha=0.7)
            axes[0, 2].axhline(0, color='black', lw=0.5)
            axes[0, 2].axvline(168, color='green', lw=2, alpha=0.5)
            axes[0, 2].set_xlabel('Lag (hours)')
            axes[0, 2].set_ylabel("Cohen's d")
            axes[0, 2].set_title('Effect Size by Lag')

            # Bottom-left: shuffle null for best result
            if shuffle_results:
                sr0 = shuffle_results[0]
                axes[1, 0].hist(sr0['null_t'], bins=50, color='gray',
                                edgecolor='black', lw=0.5, alpha=0.7, label='Null')
                axes[1, 0].axvline(sr0['t'], color='red', lw=2,
                                   label=f'Obs t = {sr0["t"]:+.2f}')
                axes[1, 0].set_xlabel('t-statistic')
                axes[1, 0].set_ylabel('Count')
                axes[1, 0].set_title(
                    f'Shuffle Null: {sr0["band"]} → {sr0["metric"]} at {sr0["lag"]}h '
                    f'(p = {sr0["p_shuffle"]:.4f})')
                axes[1, 0].legend(fontsize=8)

            # Bottom-middle: p-value curve
            log_p = -np.log10(np.clip(lag_p, 1e-20, 1.0))
            axes[1, 1].bar(scan_lags, log_p, color=colors, width=2.5,
                           edgecolor='none', alpha=0.7)
            axes[1, 1].axhline(-np.log10(0.05), color='red', ls='--', lw=1,
                               label='p = 0.05')
            axes[1, 1].axvline(168, color='green', lw=2, alpha=0.5)
            axes[1, 1].set_xlabel('Lag (hours)')
            axes[1, 1].set_ylabel('-log10(p)')
            axes[1, 1].set_title('Statistical Significance by Lag')
            axes[1, 1].legend(fontsize=8)

            # Bottom-right: superposed epoch of Earth metric around events
            epoch_lags = np.arange(-72, 73)
            epoch_mean = np.zeros(len(epoch_lags))
            epoch_se = np.zeros(len(epoch_lags))
            earth_aligned = best['earth_aligned']
            for i, el in enumerate(epoch_lags):
                vals = []
                for ev in events:
                    idx = ev + el
                    if 0 <= idx < len(earth_aligned):
                        v = earth_aligned[idx]
                        if not np.isnan(v):
                            vals.append(v)
                if vals:
                    epoch_mean[i] = np.mean(vals)
                    epoch_se[i] = np.std(vals) / np.sqrt(len(vals))
            axes[1, 2].fill_between(epoch_lags, epoch_mean - epoch_se,
                                    epoch_mean + epoch_se, alpha=0.3,
                                    color='steelblue')
            axes[1, 2].plot(epoch_lags, epoch_mean, 'b-', lw=1.5)
            axes[1, 2].axvline(0, color='red', lw=1, ls='--', label='Event')
            axes[1, 2].set_xlabel('Hours relative to event')
            axes[1, 2].set_ylabel(f'{station_code} {best["metric"]}')
            axes[1, 2].set_title('Superposed Epoch (±72h)')
            axes[1, 2].legend(fontsize=8)

            plt.suptitle(
                f'Test 5: SKR → {station_code} | '
                f'Best: {best["band"]} → {best["metric"]} at {best["lag"]}h | '
                f't = {best["t"]:+.2f}, d = {best["cohens_d"]:+.3f}',
                fontsize=13, fontweight='bold')
            plt.tight_layout()
            plt.savefig(f'test5_prediction_{station_code}.png', dpi=150)
            plt.close()
            print(f"\n  → Saved: test5_prediction_{station_code}.png")
        except ImportError:
            print("  (matplotlib not available)")

    return {
        'n_tests': n_tests,
        'n_raw_05': n_raw_05,
        'n_bh_05': n_bh_05,
        'n_bh_10': n_bh_10,
        'expected_false_05': expected_false,
        'best_band': best['band'],
        'best_metric': best['metric'],
        'best_lag': best['lag'],
        't_best': best['t'],
        'p_best': best['p'],
        'p_best_bh': best['p_bh'],
        'cohens_d': best['cohens_d'],
        'n_events': best['n_events'],
        'shuffle_results': [(s['band'], s['metric'], s['lag'], s['t'],
                             s['p_bh'], s['p_shuffle'], s['cohens_d'])
                            for s in shuffle_results],
        'cross_corrs': cross_corrs,
        'scan_lags': scan_lags,
        'lag_t': lag_t,
        'lag_p': lag_p,
        'lag_d': lag_d,
        'all_results': [{k: v for k, v in r.items()
                         if k not in ('skr_aligned', 'earth_aligned',
                                      'events', 'common')}
                        for r in all_results if r['p'] < 0.05],
    }


#  TEST 6: UTC / EARTH-ROTATION MODULATION


def test6_utc_modulation(frequencies, rpws_times, rpws_electric, do_plot):
    """Test 6: Earth-Rotation Modulation of Saturn Radio Emission

    Phase 4 critical tests established that:
      - SKR shows a 24h modulation in SIGNAL VALUES (not data volume)
      - The modulation is at the SIDEREAL day (23.93h), not solar (24.0h)
      - The phase drifts ~1.7 h/month, matching Earth orbital geometry
      - The effect survives log-transform (not outlier-driven)
      - Robust bootstrap z = 18.9 after log-transform
      - Data cadence is perfectly uniform (no DSN scheduling artifact)

    This test reproduces those findings and adds band-specific analysis.

    Sub-tests:
      6a: Sampling uniformity check (rules out data volume artifact)
      6b: Robust 24h modulation (log-transformed, median-based)
      6c: Sidereal vs solar day (Lomb-Scargle)
      6d: Phase drift over months (geometric vs fixed schedule)
      6e: Band-specific UTC modulation (SKR / AM / HF)
      6f: Log-transform bootstrap (outlier-immune confirmation)
      6g: Broadband cancellation demonstration
    """
    print(f"\n{'='*70}")
    print(f"TEST 6: Earth-Rotation Modulation of Saturn Radio Emission")
    print(f"{'='*70}")

    rng_local = np.random.default_rng(42)

    #  Prepare data
    bb = broadband_power(rpws_electric)
    valid = np.isfinite(bb)
    times_v = rpws_times[valid].copy()
    data_v = bb[valid].copy()
    data_v_z = (data_v - np.mean(data_v)) / np.std(data_v)

    # Log-transformed version (robust to outliers)
    with np.errstate(divide='ignore', invalid='ignore'):
        skr_channels = None
        if frequencies is not None:
            skr_ch = np.where((frequencies >= 100e3) & (frequencies <= 500e3))[0]
            if len(skr_ch) > 0:
                skr_channels = skr_ch
                skr_power = np.nanmean(rpws_electric[valid][:, skr_ch], axis=1)
                skr_nonzero = skr_power > 0
                skr_log = np.full(len(skr_power), np.nan)
                skr_log[skr_nonzero] = np.log10(skr_power[skr_nonzero])
            else:
                skr_log = np.log10(np.clip(np.nanmean(rpws_electric[valid], axis=1), 1e-20, None))
                skr_nonzero = np.isfinite(skr_log)

    utc_hours = np.array([
        datetime.fromtimestamp(t, tz=timezone.utc).hour +
        datetime.fromtimestamp(t, tz=timezone.utc).minute / 60.0
        for t in times_v
    ])

    #  6a: Verify uniform sampling (rules out data volume artifact)
    print(f"\n   6a: Sampling uniformity check ")
    print(f"  Total records: {len(times_v):,}")
    print(f"  Non-zero SKR records: {np.sum(skr_nonzero):,} "
          f"({100*np.mean(skr_nonzero):.1f}%)")

    n_utc_bins = 24
    utc_bin_edges = np.linspace(0, 24, n_utc_bins + 1)
    density = np.zeros(n_utc_bins)
    for i in range(n_utc_bins):
        density[i] = np.sum((utc_hours >= utc_bin_edges[i]) &
                            (utc_hours < utc_bin_edges[i + 1]))

    expected_per_bin = len(times_v) / n_utc_bins
    chi2_density, p_density = stats.chisquare(density)
    density_ratios = density / expected_per_bin
    records_per_hour_range = (np.min(density), np.max(density))
    ratio_range = (np.min(density_ratios), np.max(density_ratios))

    print(f"  Expected per 1-hour bin: {expected_per_bin:.0f}")
    print(f"  Actual range: {records_per_hour_range[0]:.0f} – {records_per_hour_range[1]:.0f} "
          f"(ratio {ratio_range[0]:.3f} – {ratio_range[1]:.3f})")
    print(f"  Sampling uniformity: χ² = {chi2_density:.1f}, p = {p_density:.6f}")
    sampling_uniform = p_density > 0.01
    print(f"  → Sampling is {'UNIFORM' if sampling_uniform else 'NON-UNIFORM'}")

    # Also check detection fraction by UTC hour
    detection_frac = np.zeros(n_utc_bins)
    for i in range(n_utc_bins):
        mask = (utc_hours >= utc_bin_edges[i]) & (utc_hours < utc_bin_edges[i + 1])
        if np.sum(mask) > 0:
            detection_frac[i] = np.mean(skr_nonzero[mask])
    det_frac_range = (np.min(detection_frac), np.max(detection_frac))
    print(f"  Detection fraction range: {det_frac_range[0]:.4f} – {det_frac_range[1]:.4f}")
    print(f"  → Data volume is {'uniform' if (det_frac_range[1] - det_frac_range[0]) < 0.01 else 'NON-UNIFORM'} across UTC hours")

    #  6b: Robust 24h modulation (log-transformed, non-zero only)
    print(f"\n   6b: Robust 24h modulation (log-transformed SKR) ")

    # Use only non-zero SKR for robust analysis
    utc_nz = utc_hours[skr_nonzero]
    skr_log_nz = skr_log[skr_nonzero]

    # Kruskal-Wallis by UTC hour (non-parametric, outlier-robust)
    utc_hour_int = np.floor(utc_nz).astype(int) % 24
    hour_groups = [skr_log_nz[utc_hour_int == h] for h in range(24)]
    hour_groups_valid = [g for g in hour_groups if len(g) > 50]

    if len(hour_groups_valid) >= 2:
        H_utc, p_utc = stats.kruskal(*hour_groups_valid)
        eta_squared = (H_utc - len(hour_groups_valid) + 1) / (len(skr_log_nz) - len(hour_groups_valid))
    else:
        H_utc, p_utc, eta_squared = 0, 1, 0

    # Hourly medians
    hourly_medians = np.zeros(24)
    hourly_means_log = np.zeros(24)
    hourly_n = np.zeros(24)
    for h in range(24):
        vals = skr_log_nz[utc_hour_int == h]
        if len(vals) > 0:
            hourly_medians[h] = np.median(vals)
            hourly_means_log[h] = np.mean(vals)
            hourly_n[h] = len(vals)

    median_mod_depth = np.max(hourly_medians) - np.min(hourly_medians)
    mean_log_mod_depth = np.max(hourly_means_log) - np.min(hourly_means_log)
    peak_hour = np.argmax(hourly_medians)
    trough_hour = np.argmin(hourly_medians)

    # Linear modulation depth percentage
    median_mod_pct = (10**np.max(hourly_medians) - 10**np.min(hourly_medians)) / 10**np.mean(hourly_medians) * 100

    print(f"  Kruskal-Wallis on log10(SKR) by UTC hour:")
    print(f"    H = {H_utc:.1f}, p = {p_utc:.2e}, η² = {eta_squared:.6f}")
    print(f"  Median modulation depth: {median_mod_depth:.4f} dex ({median_mod_pct:.1f}%)")
    print(f"  Peak at UTC {peak_hour}:00, trough at UTC {trough_hour}:00")

    print(f"\n  Hourly log10(SKR) medians:")
    print(f"  {'Hour':>6} {'Median':>12} {'Mean':>12} {'N':>8}")
    print(f"  {'-'*40}")
    for h in range(24):
        flag = " ← peak" if h == peak_hour else (" ← trough" if h == trough_hour else "")
        print(f"  {h:>6d} {hourly_medians[h]:>12.6f} {hourly_means_log[h]:>12.6f} "
              f"{hourly_n[h]:>8.0f}{flag}")

    #  6c: Sidereal vs solar day (Lomb-Scargle)
    print(f"\n   6c: Sidereal vs solar day discrimination ")

    # Hourly downsample for Lomb-Scargle
    skr_h_times, skr_h_vals = hourly_downsample(
        times_v[skr_nonzero], skr_log_nz
    )

    if len(skr_h_vals) > 100:
        ls = LombScargle(skr_h_times, skr_h_vals - np.mean(skr_h_vals))

        # Fine frequency grid around 24h, must be monotonically INCREASING in frequency
        # Period 25.0h = lower freq, Period 23.0h = higher freq
        # So we define frequencies from 1/25h to 1/23h
        freq_lo = 1.0 / (25.0 * 3600)  # Hz (longer period = lower freq)
        freq_hi = 1.0 / (23.0 * 3600)  # Hz (shorter period = higher freq)
        test_freqs_fine = np.linspace(freq_lo, freq_hi, 20000)
        test_periods_fine = 1.0 / (test_freqs_fine * 3600)  # hours (decreasing)

        ls_power_fine = ls.power(test_freqs_fine)

        peak_idx = np.argmax(ls_power_fine)
        peak_period = test_periods_fine[peak_idx]

        # Power at specific periods
        solar_period = 24.0
        sidereal_period = 23.9345  # sidereal day
        power_solar = float(ls.power(1.0 / (solar_period * 3600)))
        power_sidereal = float(ls.power(1.0 / (sidereal_period * 3600)))
        power_peak = ls_power_fine[peak_idx]

        solar_sidereal_ratio = power_solar / max(power_sidereal, 1e-20)

        print(f"  Lomb-Scargle ({len(skr_h_vals)} hourly points, log10 SKR):")
        print(f"    Peak period: {peak_period:.5f}h")
        print(f"    Power at solar day (24.0000h):    {power_solar:.6f}")
        print(f"    Power at sidereal day (23.9345h): {power_sidereal:.6f}")
        print(f"    Solar/Sidereal power ratio: {solar_sidereal_ratio:.3f}")

        offset_solar = (peak_period - solar_period) * 60  # minutes
        offset_sidereal = (peak_period - sidereal_period) * 60

        print(f"    Peak offset from solar:    {offset_solar:+.1f} minutes")
        print(f"    Peak offset from sidereal: {offset_sidereal:+.1f} minutes")

        if solar_sidereal_ratio < 0.5:
            sidereal_verdict = "SIDEREAL (Earth rotation)"
        elif solar_sidereal_ratio > 2.0:
            sidereal_verdict = "SOLAR (human schedule)"
        else:
            sidereal_verdict = "AMBIGUOUS"

        print(f"    → {sidereal_verdict}")
    else:
        power_solar, power_sidereal, solar_sidereal_ratio = 0, 0, 1
        peak_period = np.nan
        sidereal_verdict = "INSUFFICIENT DATA"
        print(f"  Insufficient hourly data for Lomb-Scargle")

    #  6d: Phase drift over months
    print(f"\n   6d: 24h modulation phase drift ")

    months = np.array([
        datetime.fromtimestamp(t, tz=timezone.utc).month for t in times_v
    ])
    months_nz = months[skr_nonzero]
    unique_months = np.unique(months_nz)

    monthly_phases = []
    monthly_amplitudes = []
    month_numbers = []

    print(f"  {'Month':>7} {'N':>8} {'Phase (h)':>10} {'Amplitude':>10} {'Peak UTC':>9}")
    print(f"  {'-'*50}")

    for m in unique_months:
        m_mask = months_nz == m
        m_utc = utc_nz[m_mask]
        m_vals = skr_log_nz[m_mask]
        if len(m_vals) < 500:
            continue

        # Fit sinusoid
        omega = 2 * np.pi / 24.0
        cos_comp = np.mean(m_vals * np.cos(omega * m_utc))
        sin_comp = np.mean(m_vals * np.sin(omega * m_utc))
        amplitude = np.sqrt(cos_comp**2 + sin_comp**2)
        phase = np.arctan2(sin_comp, cos_comp) * 24 / (2 * np.pi)
        if phase < 0:
            phase += 24

        # Also find binned peak
        bins_m = np.zeros(24)
        for h in range(24):
            mask_h = (np.floor(m_utc).astype(int) % 24) == h
            if np.sum(mask_h) > 20:
                bins_m[h] = np.median(m_vals[mask_h])
        binned_peak = np.argmax(bins_m)

        monthly_phases.append(phase)
        monthly_amplitudes.append(amplitude)
        month_numbers.append(m)
        print(f"  {m:>7d} {np.sum(m_mask):>8d} {phase:>10.2f} {amplitude:>10.4f} "
              f"{binned_peak:>8d}:00")

    phase_drift_slope = np.nan
    phase_drift_r = np.nan
    phase_drift_p = 1.0

    if len(monthly_phases) >= 3:
        # Unwrap phases for regression
        phases_arr = np.array(monthly_phases)
        months_arr = np.array(month_numbers, dtype=float)

        # Linear regression on phase vs month
        from scipy.stats import linregress
        slope, intercept, r_value, p_value, std_err = linregress(months_arr, phases_arr)
        phase_drift_slope = slope
        phase_drift_r = r_value
        phase_drift_p = p_value

        print(f"\n  Phase drift: {slope:+.2f} hours/month")
        print(f"  Correlation: r = {r_value:.3f}, p = {p_value:.4f}")
        print(f"  Expected if geometric (Earth orbit): ~2 h/month")
        print(f"  Expected if DSN schedule artifact:   ~0 h/month")

        if abs(slope) > 1.0 and p_value < 0.05:
            drift_verdict = "GEOMETRIC (Earth orbital motion)"
        elif abs(slope) < 0.5:
            drift_verdict = "FIXED SCHEDULE (artifact)"
        else:
            drift_verdict = "AMBIGUOUS"
        print(f"  → {drift_verdict}")
    else:
        drift_verdict = "INSUFFICIENT MONTHS"

    #  6e: Band-specific UTC modulation
    print(f"\n   6e: Band-specific UTC modulation ")
    band_defs = {
        'SKR (100-500 kHz)': (100e3, 500e3),
        'AM (0.8-5 MHz)': (800e3, 5e6),
        'HF (5-16 MHz)': (5e6, 16e6),
    }

    band_results_6e = []

    for band_name, (f_lo, f_hi) in band_defs.items():
        if frequencies is None:
            continue
        ch = np.where((frequencies >= f_lo) & (frequencies <= f_hi))[0]
        if len(ch) == 0:
            continue
        band_power = np.nanmean(rpws_electric[valid][:, ch], axis=1)
        bp_nonzero = band_power > 0
        if np.sum(bp_nonzero) < 500:
            continue

        bp_log = np.log10(band_power[bp_nonzero])
        bp_utc = utc_hours[bp_nonzero]
        bp_utc_int = np.floor(bp_utc).astype(int) % 24

        # Kruskal-Wallis
        bp_groups = [bp_log[bp_utc_int == h] for h in range(24)]
        bp_groups_valid = [g for g in bp_groups if len(g) > 50]
        if len(bp_groups_valid) >= 2:
            H_b, p_b = stats.kruskal(*bp_groups_valid)
        else:
            H_b, p_b = 0, 1

        # Profile
        prof_b = np.zeros(24)
        for h in range(24):
            vals_h = bp_log[bp_utc_int == h]
            if len(vals_h) > 50:
                prof_b[h] = np.median(vals_h)

        mod_b = np.max(prof_b) - np.min(prof_b)
        peak_b = np.argmax(prof_b)
        trough_b = np.argmin(prof_b)

        # Goldstone window mean difference
        gold_b = (bp_utc >= 14) | (bp_utc < 2)
        if np.sum(gold_b) > 100 and np.sum(~gold_b) > 100:
            diff_b = np.median(bp_log[gold_b]) - np.median(bp_log[~gold_b])
        else:
            diff_b = 0

        band_results_6e.append({
            'name': band_name, 'H': H_b, 'p': p_b, 'diff': diff_b,
            'mod': mod_b, 'peak': peak_b, 'trough': trough_b,
            'profile': prof_b,
        })

        flag = " ◄◄" if p_b < 1e-10 else (" ◄" if p_b < 0.01 else "")
        print(f"  {band_name:>20}: H = {H_b:>8.1f}, p = {p_b:.2e}, "
              f"mod = {mod_b:.4f} dex, "
              f"peak UTC {peak_b}h, trough UTC {trough_b}h{flag}")

    # Detect antiphase cancellation
    band_cancellation = False
    sig_bands = []
    if len(band_results_6e) >= 2:
        sig_bands = [br for br in band_results_6e if br['p'] < 0.01]
        if len(sig_bands) >= 2:
            # Check if peak hours are significantly different
            peaks = [br['peak'] for br in sig_bands]
            peak_spread = max(peaks) - min(peaks)
            if peak_spread > 6:  # More than 6 hours apart
                band_cancellation = True
                print(f"\n  ⚠ ANTIPHASE PATTERN DETECTED:")
                print(f"    {len(sig_bands)} bands individually significant "
                      f"with peaks {peak_spread}h apart")
                print(f"    This explains why broadband tests show reduced effect")

    #  6f: Log-transform bootstrap
    print(f"\n   6f: Log-transform bootstrap ({N_SHUFFLE} iterations) ")

    # Compute real 24h amplitude from sinusoidal fit
    omega = 2 * np.pi / 24.0
    cos_real = np.mean(skr_log_nz * np.cos(omega * utc_nz))
    sin_real = np.mean(skr_log_nz * np.sin(omega * utc_nz))
    amp_real = np.sqrt(cos_real**2 + sin_real**2)

    # Day-shuffle null: shuffle which CALENDAR DAY maps to which data block.
    # This preserves within-day autocorrelation but destroys the UTC-hour
    # to signal-value mapping across days.
    #
    # Why not circular shift? A circular shift of N samples still maps each
    # sample to a UTC hour that's only offset by N%1440 minutes, the 24h
    # structure survives the shift, making the null too conservative.

    # Group data by calendar day
    day_numbers = np.array([
        int(datetime.fromtimestamp(t, tz=timezone.utc).strftime('%j'))
        for t in times_v[skr_nonzero]
    ])
    unique_days = np.unique(day_numbers)

    # For each day, get the indices
    day_indices = {}
    for d in unique_days:
        day_indices[d] = np.where(day_numbers == d)[0]

    amp_null = np.zeros(N_SHUFFLE)
    H_null = np.zeros(min(N_SHUFFLE, 500))

    for i in range(N_SHUFFLE):
        # Shuffle: assign each day's signal values to a randomly chosen day's UTC hours
        shuffled_log = np.empty_like(skr_log_nz)
        shuffled_utc = utc_nz.copy()

        # Create a random permutation of days
        day_perm = rng_local.permutation(unique_days)

        # Map each day's signal values to a different day's time slots
        for orig_day, new_day in zip(unique_days, day_perm):
            orig_idx = day_indices[orig_day]
            new_idx = day_indices[new_day]
            # Match by position within day (both should have ~1440 samples)
            n_match = min(len(orig_idx), len(new_idx))
            shuffled_log[orig_idx[:n_match]] = skr_log_nz[new_idx[:n_match]]
            # If lengths differ, fill remainder with random samples from new_day
            if len(orig_idx) > n_match:
                extra = rng_local.choice(skr_log_nz[new_idx], size=len(orig_idx) - n_match)
                shuffled_log[orig_idx[n_match:]] = extra

        cos_s = np.mean(shuffled_log * np.cos(omega * shuffled_utc))
        sin_s = np.mean(shuffled_log * np.sin(omega * shuffled_utc))
        amp_null[i] = np.sqrt(cos_s**2 + sin_s**2)

        # KW test on subset of shuffles
        if i < len(H_null):
            hour_int_nz = np.floor(shuffled_utc).astype(int) % 24
            s_groups = [shuffled_log[hour_int_nz == h] for h in range(24)]
            s_groups_valid = [g for g in s_groups if len(g) > 50]
            if len(s_groups_valid) >= 2:
                H_null[i], _ = stats.kruskal(*s_groups_valid)

    z_amp = (amp_real - np.mean(amp_null)) / max(np.std(amp_null), 1e-10)
    p_amp = np.mean(amp_null >= amp_real)

    print(f"  Null method: day-block shuffle (preserves within-day autocorrelation,")
    print(f"               destroys day-to-UTC-hour mapping)")
    print(f"\n  Real 24h amplitude (log10 SKR): {amp_real:.6f}")
    print(f"  Null amplitude: {np.mean(amp_null):.6f} ± {np.std(amp_null):.6f}")
    print(f"  z = {z_amp:.2f}, p = {p_amp:.4f}")

    z_kw = (H_utc - np.mean(H_null)) / max(np.std(H_null), 1e-10)
    p_kw = np.mean(H_null >= H_utc)

    print(f"\n  Kruskal-Wallis bootstrap:")
    print(f"    Real H: {H_utc:.1f}")
    print(f"    Null H: {np.mean(H_null):.1f} ± {np.std(H_null):.1f}")
    print(f"    z = {z_kw:.1f}, p = {p_kw:.4f}")

    #  6g: Broadband cancellation demonstration
    print(f"\n   6g: Broadband cancellation demonstration ")

    # Broadband Goldstone test (what the old test6 did)
    gold_mask = (utc_hours >= 14) | (utc_hours < 2)
    mean_gold_bb = np.mean(data_v_z[gold_mask])
    mean_other_bb = np.mean(data_v_z[~gold_mask])
    gold_diff_bb = mean_gold_bb - mean_other_bb
    t_bb, p_bb = stats.ttest_ind(data_v_z[gold_mask], data_v_z[~gold_mask])

    print(f"  Broadband (all channels combined):")
    print(f"    Goldstone - Other: {gold_diff_bb:+.4f}σ, t = {t_bb:+.2f}, p = {p_bb:.2e}")
    print(f"    → {'SIGNIFICANT' if p_bb < 0.05 else 'NULL (bands cancel)'}")

    if skr_channels is not None:
        skr_log_gold = skr_log_nz[utc_nz >= 14]  # simplified
        gold_nz = (utc_nz >= 14) | (utc_nz < 2)
        if np.sum(gold_nz) > 100 and np.sum(~gold_nz) > 100:
            skr_gold_med = np.median(skr_log_nz[gold_nz])
            skr_other_med = np.median(skr_log_nz[~gold_nz])
            t_skr, p_skr = stats.mannwhitneyu(
                skr_log_nz[gold_nz], skr_log_nz[~gold_nz], alternative='two-sided'
            )
            print(f"  SKR band only (log10, median-based):")
            print(f"    Goldstone median: {skr_gold_med:.6f}")
            print(f"    Other median:     {skr_other_med:.6f}")
            print(f"    Mann-Whitney p = {p_skr:.2e}")

    print(f"\n  KEY FINDING: Broadband tests show NULL because SKR and HF")
    print(f"  bands modulate in OPPOSITE phase with UTC. Testing individual")
    print(f"  bands or using log-transform reveals robust 24h modulation.")

    #  Summary
    print(f"\n  {'='*60}")
    print(f"  TEST 6 SUMMARY")
    print(f"  {'='*60}")
    print(f"  Sampling uniformity: p = {p_density:.4f} (UNIFORM, no DSN artifact)")
    print(f"  Robust 24h modulation (log SKR):")
    print(f"    Kruskal-Wallis: H = {H_utc:.1f}, p = {p_utc:.2e}")
    print(f"    Bootstrap: z = {z_kw:.1f}, p = {p_kw:.4f}")
    print(f"    Amplitude bootstrap: z = {z_amp:.1f}, p = {p_amp:.4f}")
    print(f"  Median modulation depth: {median_mod_depth:.4f} dex ({median_mod_pct:.1f}%)")
    print(f"  Sidereal/Solar: ratio = {solar_sidereal_ratio:.3f} → {sidereal_verdict}")
    if not np.isnan(phase_drift_slope):
        print(f"  Phase drift: {phase_drift_slope:+.2f} h/month "
              f"(r = {phase_drift_r:.3f}, p = {phase_drift_p:.4f}) → {drift_verdict}")
    if band_cancellation:
        print(f"  Band-specific: antiphase cancellation masks broadband effect")

    #  Verdict
    evidence_for = []
    evidence_against = []

    # Genuine physical coupling evidence
    if p_utc < 0.001:
        evidence_for.append(f"Kruskal-Wallis H={H_utc:.0f}, p={p_utc:.1e}")
    if z_amp > 3.0 or p_amp < 0.01:
        evidence_for.append(f"Log-transform amplitude bootstrap z={z_amp:.1f}, p={p_amp:.4f}")
    if z_kw > 3.0 or p_kw < 0.01:
        evidence_for.append(f"KW day-shuffle null z={z_kw:.1f}, p={p_kw:.4f}")
    if solar_sidereal_ratio < 0.5:
        evidence_for.append(f"Sidereal day preferred (ratio={solar_sidereal_ratio:.2f})")
    if not np.isnan(phase_drift_slope) and abs(phase_drift_slope) > 1.0 and phase_drift_p < 0.05:
        evidence_for.append(f"Phase drift {phase_drift_slope:+.1f} h/month matches geometry")
    elif not np.isnan(phase_drift_slope) and abs(phase_drift_slope) > 1.0 and phase_drift_p < 0.15:
        evidence_for.append(f"Phase drift {phase_drift_slope:+.1f} h/month suggestive (p={phase_drift_p:.3f})")
    if sampling_uniform:
        evidence_for.append("Data cadence perfectly uniform (no DSN artifact)")
    if band_cancellation:
        evidence_for.append(f"Band-specific antiphase: {len(sig_bands)} bands individually p < 0.01")

    # Complicating factors
    if band_cancellation:
        evidence_against.append("Band-specific antiphase cancellation in broadband")
    if p_bb > 0.05:
        evidence_against.append(f"Broadband Goldstone test null (p={p_bb:.2f})")

    print(f"\n  EVIDENCE FOR GENUINE EARTH-SATURN COUPLING ({len(evidence_for)}):")
    for e in evidence_for:
        print(f"    ✓ {e}")
    if evidence_against:
        print(f"\n  COMPLICATING FACTORS ({len(evidence_against)}):")
        for e in evidence_against:
            print(f"    ⚠ {e}")

    if len(evidence_for) >= 3:
        status = "SIGNIFICANT, genuine 24h modulation in SKR signal values"
    elif len(evidence_for) >= 1:
        status = "MARGINAL"
    else:
        status = "not significant"

    print(f"\n  → {status}")
    if "SIGNIFICANT" in status:
        print(f"    The SKR signal shows a robust ~24h modulation that:")
        print(f"    • Is in the signal VALUES, not data volume")
        print(f"    • Prefers the SIDEREAL day over the solar day")
        print(f"    • Shows phase drift consistent with Earth's orbital geometry")
        print(f"    • Survives log-transform (not outlier-driven)")
        print(f"    • Cannot be explained by DSN scheduling artifacts")

    #  Plot
    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(2, 3, figsize=(18, 10))

            # Panel 1: Hourly median profile (log SKR)
            axes[0, 0].bar(range(24), hourly_medians, color='steelblue',
                           edgecolor='black', linewidth=0.5, alpha=0.7)
            axes[0, 0].axhline(np.mean(hourly_medians), color='black',
                               linewidth=0.5, linestyle='--')
            axes[0, 0].axvspan(14, 24, alpha=0.08, color='gold', label='Goldstone')
            axes[0, 0].axvspan(0, 2, alpha=0.08, color='gold')
            axes[0, 0].set_xlabel('UTC Hour')
            axes[0, 0].set_ylabel('Median log10(SKR)')
            axes[0, 0].set_title(f'Robust UTC Profile\n'
                                 f'KW H={H_utc:.0f}, p={p_utc:.1e}')
            axes[0, 0].set_xlim(-0.5, 23.5)
            axes[0, 0].legend(fontsize=7)

            # Panel 2: Sidereal vs solar (Lomb-Scargle)
            if len(skr_h_vals) > 100:
                axes[0, 1].plot(test_periods_fine, ls_power_fine, 'b-', linewidth=0.8)
                axes[0, 1].axvline(24.0, color='orange', linewidth=2, alpha=0.7,
                                   label=f'Solar 24.0h (P={power_solar:.4f})')
                axes[0, 1].axvline(23.9345, color='green', linewidth=2, alpha=0.7,
                                   label=f'Sidereal 23.93h (P={power_sidereal:.4f})')
                axes[0, 1].set_xlabel('Period (hours)')
                axes[0, 1].set_ylabel('LS Power')
                axes[0, 1].set_title(f'Sidereal vs Solar Day\n'
                                     f'Ratio = {solar_sidereal_ratio:.3f}')
                axes[0, 1].legend(fontsize=7)
                axes[0, 1].set_xlim(23.0, 25.0)
            else:
                axes[0, 1].text(0.5, 0.5, 'Insufficient data',
                                transform=axes[0, 1].transAxes, ha='center')

            # Panel 3: Phase drift
            if len(monthly_phases) >= 3:
                axes[0, 2].scatter(month_numbers, monthly_phases, c='red', s=60,
                                   zorder=5, edgecolors='black', linewidth=0.5)
                fit_x = np.linspace(min(month_numbers), max(month_numbers), 100)
                fit_y = phase_drift_slope * fit_x + (np.mean(monthly_phases) -
                        phase_drift_slope * np.mean(month_numbers))
                axes[0, 2].plot(fit_x, fit_y, 'k--', linewidth=1,
                                label=f'Slope = {phase_drift_slope:+.2f} h/month')
                axes[0, 2].set_xlabel('Month (2017)')
                axes[0, 2].set_ylabel('24h Phase (UTC hours)')
                axes[0, 2].set_title(f'Phase Drift\n'
                                     f'r={phase_drift_r:.3f}, p={phase_drift_p:.4f}')
                axes[0, 2].legend(fontsize=8)
            else:
                axes[0, 2].text(0.5, 0.5, 'Insufficient months',
                                transform=axes[0, 2].transAxes, ha='center')

            # Panel 4: Band-specific profiles
            if band_results_6e:
                colors_band = ['#e41a1c', '#377eb8', '#4daf4a']
                for j, br in enumerate(band_results_6e):
                    c = colors_band[j % len(colors_band)]
                    axes[1, 0].plot(range(24), br['profile'], '-o', color=c,
                                    markersize=3, linewidth=1.5,
                                    label=f"{br['name']} (p={br['p']:.1e})")
                axes[1, 0].axhline(0, color='black', linewidth=0.5)
                axes[1, 0].axvspan(14, 24, alpha=0.08, color='gold')
                axes[1, 0].axvspan(0, 2, alpha=0.08, color='gold')
                axes[1, 0].set_xlabel('UTC Hour')
                axes[1, 0].set_ylabel('Median log10(band power)')
                title_band = 'Band-Specific UTC Profiles'
                if band_cancellation:
                    title_band += '\n⚠ ANTIPHASE → broadband cancellation'
                axes[1, 0].set_title(title_band)
                axes[1, 0].set_xlim(-0.5, 23.5)
                axes[1, 0].legend(fontsize=7)
            else:
                axes[1, 0].text(0.5, 0.5, 'No band data',
                                transform=axes[1, 0].transAxes, ha='center')

            # Panel 5: Amplitude bootstrap null
            axes[1, 1].hist(amp_null, bins=50, color='gray', edgecolor='black',
                            linewidth=0.5, alpha=0.7, label='Null (circular shift)')
            axes[1, 1].axvline(amp_real, color='red', linewidth=2,
                               label=f'Observed = {amp_real:.6f}')
            axes[1, 1].set_xlabel('24h Amplitude (log10 SKR)')
            axes[1, 1].set_ylabel('Count')
            axes[1, 1].set_title(f'Log-Transform Bootstrap\n'
                                 f'z = {z_amp:.1f}, p = {p_amp:.4f}')
            axes[1, 1].legend(fontsize=8)

            # Panel 6: KW bootstrap null
            axes[1, 2].hist(H_null, bins=50, color='gray', edgecolor='black',
                            linewidth=0.5, alpha=0.7, label='Null (circular shift)')
            axes[1, 2].axvline(H_utc, color='red', linewidth=2,
                               label=f'Observed H = {H_utc:.0f}')
            axes[1, 2].set_xlabel('Kruskal-Wallis H')
            axes[1, 2].set_ylabel('Count')
            axes[1, 2].set_title(f'KW Bootstrap\n'
                                 f'z = {z_kw:.1f}, p = {p_kw:.4f}')
            axes[1, 2].legend(fontsize=8)

            plt.suptitle('Test 6: Earth-Rotation Modulation of Saturn Radio Emission',
                         fontsize=14, fontweight='bold')
            plt.tight_layout()
            plt.savefig('test6_utc_modulation.png', dpi=150)
            plt.close()
            print("  → Saved: test6_utc_modulation.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {
        'sampling_uniform': sampling_uniform,
        'p_density': p_density,
        'H_utc': H_utc,
        'p_utc': p_utc,
        'eta_squared': eta_squared,
        'median_mod_depth': median_mod_depth,
        'median_mod_pct': median_mod_pct,
        'peak_hour': peak_hour,
        'trough_hour': trough_hour,
        'z_amp': z_amp,
        'p_amp': p_amp,
        'z_kw': z_kw,
        'p_kw': p_kw,
        'power_solar': power_solar,
        'power_sidereal': power_sidereal,
        'solar_sidereal_ratio': solar_sidereal_ratio,
        'sidereal_verdict': sidereal_verdict,
        'peak_period': peak_period,
        'phase_drift_slope': phase_drift_slope,
        'phase_drift_r': phase_drift_r,
        'phase_drift_p': phase_drift_p,
        'drift_verdict': drift_verdict if not np.isnan(phase_drift_slope) else 'N/A',
        'band_cancellation': band_cancellation,
        'band_results': band_results_6e,
        'n_evidence_for': len(evidence_for),
        'evidence_for': evidence_for,
    }


#  TEST 8: 168-HOUR ENVELOPE PERIODICITY


def test8_envelope_168h(frequencies, times, electric, do_plot=True):
    """Test 8: Does the Saturn rotation carrier envelope show a 168h periodicity?

    The ~10.8h Saturn rotation modulates SKR power. The AMPLITUDE ENVELOPE
    of this carrier, how strong the rotation modulation is over time,
    may itself be periodic. If the envelope shows a peak near 168h (= 1 week),
    this is an independent confirmation of the 168h timescale found in
    Tests 1, 5, and the coding analysis, derived entirely from the SKR
    signal structure without any Earth data.
    """
    print("\n" + "=" * 70)
    print("TEST 8: 168-Hour Periodicity in Saturn Rotation Envelope")
    print("=" * 70)

    bb = broadband_power(electric)
    valid = ~np.isnan(bb)
    bb_v = bb[valid]
    times_v = times[valid]

    # Normalize
    bb_n = (bb_v - np.mean(bb_v)) / np.std(bb_v)

    # Fix any remaining non-finites
    bad = ~np.isfinite(bb_n)
    if np.any(bad):
        good_idx = np.where(~bad)[0]
        bad_idx = np.where(bad)[0]
        bb_n[bad_idx] = np.interp(bad_idx, good_idx, bb_n[good_idx])

    dt_median = np.median(np.diff(times_v[:1000]))
    fs = 1.0 / dt_median
    nyquist = fs / 2

    SATURN_ROT = 10.791  # hours
    f_saturn_hz = 1.0 / (SATURN_ROT * 3600)

    print(f"  Data: {len(bb_n):,} points, cadence = {dt_median:.1f}s")
    print(f"  Saturn rotation period: {SATURN_ROT:.3f}h")
    print(f"  f_saturn = {f_saturn_hz * 1000:.4f} mHz")

    #  8a: Extract Saturn rotation carrier and its envelope
    print(f"\n   8a: Saturn rotation carrier extraction ")

    f_low = f_saturn_hz * 0.85
    f_high = f_saturn_hz * 1.15

    # Use SOS form for numerical stability at low frequencies
    try:
        from scipy.signal import butter, sosfiltfilt, hilbert
        sos = butter(2, [f_low / nyquist, f_high / nyquist], btype='band', output='sos')
        carrier = sosfiltfilt(sos, bb_n)

        if np.any(np.isnan(carrier)):
            # FFT bandpass fallback
            ft = np.fft.rfft(bb_n)
            freqs_fft = np.fft.rfftfreq(len(bb_n), d=dt_median)
            mask_bp = (freqs_fft >= f_low) & (freqs_fft <= f_high)
            carrier = np.fft.irfft(ft * mask_bp, n=len(bb_n))
            print(f"  Filter: FFT bandpass fallback")
        else:
            print(f"  Filter: Butterworth SOS, order 2")

    except Exception as e:
        print(f"  Filter failed: {e}")
        return None

    # Hilbert envelope
    analytic = hilbert(carrier)
    envelope = np.abs(analytic)

    # Smooth (1-hour kernel)
    kernel_size = max(1, int(3600 / dt_median))
    kernel = np.ones(kernel_size) / kernel_size
    envelope_smooth = np.convolve(envelope, kernel, mode='same')

    mod_index = np.std(envelope_smooth) / max(np.mean(envelope_smooth), 1e-10)
    print(f"  Carrier std: {np.std(carrier):.4f}")
    print(f"  Envelope mean: {np.mean(envelope_smooth):.4f}")
    print(f"  Modulation index: {mod_index:.3f}")

    #  8b: Lomb-Scargle periodogram of envelope
    print(f"\n   8b: Envelope periodogram ")

    # Downsample envelope for Lomb-Scargle
    ds = max(1, int(600 / dt_median))  # ~10 min
    env_ds = envelope_smooth[::ds]
    t_ds = times_v[::ds]

    test_periods = np.linspace(5, 500, 5000)
    test_freqs_hz = 1.0 / (test_periods * 3600)

    env_centered = env_ds - np.mean(env_ds)
    ls_power = np.zeros(len(test_freqs_hz))
    chunk = 500
    for i in range(0, len(test_freqs_hz), chunk):
        f_chunk = test_freqs_hz[i:i + chunk]
        ls_power[i:i + chunk] = signal.lombscargle(
            t_ds, env_centered, 2 * np.pi * f_chunk, normalize=True
        )

    from scipy.signal import find_peaks
    peaks_idx, _ = find_peaks(ls_power, height=np.percentile(ls_power, 95))

    known_periods = {
        10.791: "Saturn rotation",
        11.495: "PPO South",
        9.784: "9.75h anomaly",
        45.306: "Tethys orbital",
        24.0: "Earth day",
        168.0: "168h (1 week)",
        173.0: "173h",
        144.4: "144h",
        302.0: "302h",
    }

    if len(peaks_idx) > 0:
        peak_T = test_periods[peaks_idx]
        peak_P = ls_power[peaks_idx]
        sort_idx = np.argsort(peak_P)[::-1][:15]

        print(f"\n  {'#':>4} {'Period (h)':>11} {'Days':>7} {'LS power':>10} {'ID':>25}")
        print(f"  {'-'*4} {'-'*11} {'-'*7} {'-'*10} {'-'*25}")

        for rank, idx in enumerate(sort_idx):
            T = peak_T[idx]
            pw = peak_P[idx]
            best_match = "UNIDENTIFIED"
            for kT, kname in known_periods.items():
                if abs(T - kT) / kT < 0.05:
                    best_match = kname
                    break
            flag = " ◄◄" if "168" in best_match or "week" in best_match else ""
            print(f"  {rank + 1:>4d} {T:>11.1f} {T / 24:>7.1f} {pw:>10.6f} "
                  f"{best_match:>25}{flag}")

    #  8c: Specific test at 168h
    print(f"\n   8c: Specific test at 168h ")

    freq_168_hz = 1.0 / (168.0 * 3600)
    power_at_168 = float(signal.lombscargle(
        t_ds, env_centered, np.array([2 * np.pi * freq_168_hz]), normalize=True
    ).ravel()[0])
    print(f"  Power at 168h: {power_at_168:.6f}")

    # Background: power in neighborhood excluding 168h ± 10h
    neighborhood = (test_periods > 100) & (test_periods < 250) & \
                   (np.abs(test_periods - 168) > 15)
    background_power = ls_power[neighborhood]
    bg_mean = np.mean(background_power)
    bg_std = np.std(background_power)
    z_168 = (power_at_168 - bg_mean) / max(bg_std, 1e-10)

    print(f"  Background (100–250h excl 168±15h): mean = {bg_mean:.6f}, std = {bg_std:.6f}")
    print(f"  Z-score: {z_168:.2f}")

    # Check if 168h is among the top peaks
    rank_168 = None
    if len(peaks_idx) > 0:
        for rank, idx in enumerate(sort_idx):
            T = peak_T[idx]
            if abs(T - 168) < 10:
                rank_168 = rank + 1
                break

    if rank_168 is not None:
        print(f"  168h peak rank: #{rank_168} out of {len(peaks_idx)} peaks")
    else:
        print(f"  168h is not among the detected peaks")

    #  8d: Phase-randomized surrogate null for 168h peak
    print(f"\n   8d: Surrogate null for 168h envelope power ({N_SHUFFLE} iterations) ")

    n_pts = len(bb_n)
    fft_orig = np.fft.rfft(bb_n)
    amp_orig = np.abs(fft_orig)

    null_power_168 = np.zeros(N_SHUFFLE)

    for i in range(N_SHUFFLE):
        # Phase-randomize the RAW signal, then re-extract envelope
        random_phases = rng.uniform(0, 2 * np.pi, size=len(amp_orig))
        random_phases[0] = 0
        if n_pts % 2 == 0:
            random_phases[-1] = 0
        surrogate = np.fft.irfft(amp_orig * np.exp(1j * random_phases), n=n_pts)

        # Extract carrier and envelope from surrogate
        try:
            surr_carrier = sosfiltfilt(sos, surrogate)
        except Exception:
            ft_s = np.fft.rfft(surrogate)
            freqs_s = np.fft.rfftfreq(len(surrogate), d=dt_median)
            mask_s = (freqs_s >= f_low) & (freqs_s <= f_high)
            surr_carrier = np.fft.irfft(ft_s * mask_s, n=len(surrogate))

        surr_analytic = hilbert(surr_carrier)
        surr_env = np.abs(surr_analytic)
        surr_env_smooth = np.convolve(surr_env, kernel, mode='same')
        surr_env_ds = surr_env_smooth[::ds]
        surr_env_centered = surr_env_ds - np.mean(surr_env_ds)

        # Power at 168h
        surr_power = signal.lombscargle(
            t_ds, surr_env_centered, np.array([2 * np.pi * freq_168_hz]), normalize=True
        )
        null_power_168[i] = float(np.ravel(surr_power)[0])

    p_168_surr = np.mean(null_power_168 >= power_at_168)
    z_168_surr = (power_at_168 - np.mean(null_power_168)) / max(np.std(null_power_168), 1e-10)

    print(f"  Observed 168h power: {power_at_168:.6f}")
    print(f"  Surrogate mean: {np.mean(null_power_168):.6f} ± {np.std(null_power_168):.6f}")
    print(f"  z = {z_168_surr:.2f}, p = {p_168_surr:.4f}")

    #  8e: Broader test: any peak near 168h
    # Find the maximum power within 150–190h range
    range_mask = (test_periods >= 150) & (test_periods <= 190)
    if np.any(range_mask):
        max_power_range = np.max(ls_power[range_mask])
        max_T_range = test_periods[range_mask][np.argmax(ls_power[range_mask])]

        # Same for surrogates
        null_max_range = np.zeros(min(N_SHUFFLE, 200))
        for i in range(len(null_max_range)):
            random_phases = rng.uniform(0, 2 * np.pi, size=len(amp_orig))
            random_phases[0] = 0
            if n_pts % 2 == 0:
                random_phases[-1] = 0
            surrogate = np.fft.irfft(amp_orig * np.exp(1j * random_phases), n=n_pts)

            try:
                surr_carrier = sosfiltfilt(sos, surrogate)
            except Exception:
                ft_s = np.fft.rfft(surrogate)
                freqs_s = np.fft.rfftfreq(len(surrogate), d=dt_median)
                mask_s = (freqs_s >= f_low) & (freqs_s <= f_high)
                surr_carrier = np.fft.irfft(ft_s * mask_s, n=len(surrogate))

            surr_analytic = hilbert(surr_carrier)
            surr_env = np.abs(surr_analytic)
            surr_env_smooth = np.convolve(surr_env, kernel, mode='same')
            surr_env_ds = surr_env_smooth[::ds]
            surr_env_centered = surr_env_ds - np.mean(surr_env_ds)

            surr_ls_range = np.zeros(np.sum(range_mask))
            range_freqs = test_freqs_hz[range_mask]
            for j in range(0, len(range_freqs), chunk):
                fc = range_freqs[j:j + chunk]
                surr_ls_range[j:j + chunk] = signal.lombscargle(
                    t_ds, surr_env_centered, 2 * np.pi * fc, normalize=True
                )
            null_max_range[i] = np.max(surr_ls_range)

        p_range = np.mean(null_max_range >= max_power_range)
        print(f"\n   8e: Peak power in 150-190h band ")
        print(f"  Max power at T = {max_T_range:.1f}h: {max_power_range:.6f}")
        print(f"  Surrogate p (any peak in 150-190h): {p_range:.4f}")
    else:
        max_T_range = np.nan
        max_power_range = np.nan
        p_range = 1.0

    # Summary
    print(f"\n  {'='*60}")
    print(f"  TEST 8 SUMMARY")
    print(f"  {'='*60}")
    print(f"  Saturn carrier modulation index: {mod_index:.3f}")
    print(f"  168h envelope power: z = {z_168:.1f} (vs neighborhood)")
    print(f"  168h surrogate null: z = {z_168_surr:.1f}, p = {p_168_surr:.4f}")
    if rank_168 is not None:
        print(f"  168h peak rank: #{rank_168}")
    print(f"  150–190h band: peak at {max_T_range:.1f}h, surrogate p = {p_range:.4f}")
    sig = p_168_surr < 0.01 or p_range < 0.01
    status = "SIGNIFICANT" if sig else "not significant"
    print(f"  → {status}")
    if status == "SIGNIFICANT":
        print(f"    The 168h timescale appears in the Saturn rotation envelope")
        print(f"    with NO Earth data involved. This is an independent")
        print(f"    confirmation of the weekly timescale from Tests 1 and 5.")

    if do_plot:
        try:
            import matplotlib.pyplot as plt
            fig, axes = plt.subplots(1, 3, figsize=(18, 5))

            # Envelope periodogram
            axes[0].plot(test_periods, ls_power, 'b-', linewidth=0.5, alpha=0.7)
            axes[0].axvline(168, color='red', linewidth=2, alpha=0.5, label='168h')
            if len(peaks_idx) > 0:
                axes[0].scatter(test_periods[peaks_idx], ls_power[peaks_idx],
                                c='orange', s=20, zorder=5, label='Peaks')
            axes[0].set_xlabel('Period (hours)')
            axes[0].set_ylabel('Lomb-Scargle Power')
            axes[0].set_title('Saturn Rotation Envelope Periodogram')
            axes[0].set_xlim(5, 500)
            axes[0].legend(fontsize=8)

            # Zoom on 100–250h
            range_detail = (test_periods >= 100) & (test_periods <= 250)
            axes[1].plot(test_periods[range_detail], ls_power[range_detail],
                         'b-', linewidth=1)
            axes[1].axvline(168, color='red', linewidth=2, alpha=0.7, label='168h')
            axes[1].fill_between(test_periods[range_detail],
                                 bg_mean - 2 * bg_std, bg_mean + 2 * bg_std,
                                 alpha=0.2, color='gray', label='Background ±2σ')
            axes[1].set_xlabel('Period (hours)')
            axes[1].set_ylabel('LS Power')
            axes[1].set_title(f'Detail: 100–250h (168h z = {z_168:.1f})')
            axes[1].legend(fontsize=8)

            # Surrogate null
            axes[2].hist(null_power_168, bins=50, color='gray', edgecolor='black',
                         linewidth=0.5, alpha=0.7, label='Surrogates')
            axes[2].axvline(power_at_168, color='red', linewidth=2,
                            label=f'Observed = {power_at_168:.6f}')
            axes[2].set_xlabel('Power at 168h')
            axes[2].set_ylabel('Count')
            axes[2].set_title(f'Surrogate Null (p = {p_168_surr:.4f})')
            axes[2].legend(fontsize=8)

            plt.tight_layout()
            plt.savefig('test8_envelope_168h.png', dpi=150)
            plt.close()
            print("  → Saved: test8_envelope_168h.png")
        except ImportError:
            print("  (matplotlib not available, skipping plot)")

    return {
        'mod_index': mod_index,
        'power_168': power_at_168,
        'z_168_neighborhood': z_168,
        'z_168_surrogate': z_168_surr,
        'p_168_surrogate': p_168_surr,
        'rank_168': rank_168,
        'max_T_range': max_T_range,
        'p_range': p_range,
    }


#  TEST 7: PARITY / BIT-STRUCTURE ANALYSIS


def test7_parity_bias(frequencies, times, electric, do_plot=True):
    """Test 7: Does the SKR signal show anomalous parity or bit-level structure?

    Converts SKR broadband power to a binary sequence (above/below median)
    and tests for:
      - Run-length persistence (consecutive same-value bits)
      - Parity bias in sliding windows
      - Whether these are explained by autocorrelation alone
    """
    print("\n" + "=" * 70)
    print("TEST 7: Parity / Bit-Structure Analysis")
    print("=" * 70)

    bb = broadband_power(electric)
    valid = ~np.isnan(bb)
    bb_v = bb[valid]
    times_v = times[valid]

    # Convert to binary: 1 if above median, 0 if below
    median_val = np.median(bb_v)
    bits = (bb_v > median_val).astype(int)
    n_bits = len(bits)

    print(f"  N = {n_bits:,} samples")
    print(f"  Median threshold: {median_val:.6f}")
    print(f"  Bit balance: {np.mean(bits):.4f} (expect ~0.5)")

    #  7a: Run-length analysis
    print(f"\n   7a: Run-length persistence ")

    # Count runs (consecutive identical bits)
    runs = []
    current_run = 1
    for i in range(1, n_bits):
        if bits[i] == bits[i - 1]:
            current_run += 1
        else:
            runs.append(current_run)
            current_run = 1
    runs.append(current_run)
    runs = np.array(runs)
    n_runs = len(runs)
    mean_run = np.mean(runs)

    # Expected for IID binary: geometric distribution with p=0.5
    # Mean run length = 1 / (1 - 0.5) = 2.0
    expected_mean_run = 2.0
    persistence_ratio = mean_run / expected_mean_run

    print(f"  Total runs: {n_runs:,}")
    print(f"  Mean run length: {mean_run:.2f} (IID expected: {expected_mean_run:.1f})")
    print(f"  Persistence ratio: {persistence_ratio:.1f}×")

    # Wald-Wolfowitz runs test
    n1 = np.sum(bits == 1)
    n0 = np.sum(bits == 0)
    expected_runs = 1 + (2 * n0 * n1) / (n0 + n1)
    var_runs = (2 * n0 * n1 * (2 * n0 * n1 - n0 - n1)) / \
               ((n0 + n1)**2 * (n0 + n1 - 1))
    z_runs = (n_runs - expected_runs) / max(np.sqrt(var_runs), 1e-10)
    p_runs = 2 * stats.norm.sf(abs(z_runs))

    print(f"  Wald-Wolfowitz runs test:")
    print(f"    Expected runs: {expected_runs:.0f}, Observed: {n_runs}")
    print(f"    z = {z_runs:.2f}, p = {p_runs:.2e}")
    print(f"    → {'Fewer runs than expected (persistence)' if z_runs < 0 else 'More runs than expected'}")

    #  7b: Block-surrogate test
    # Test whether autocorrelation alone explains the persistence
    print(f"\n   7b: Autocorrelation-matched surrogate test ")

    # Test multiple block sizes to find one that preserves autocorrelation
    block_sizes = [60, 120, 240, 480, 960, 1440]  # 1min to 24h blocks
    n_surr_exceed = 0
    n_block_sizes_tested = 0
    best_block_z = None

    for block_size in block_sizes:
        if block_size >= n_bits // 10:
            continue
        n_block_sizes_tested += 1

        surr_persistence = np.zeros(min(N_SHUFFLE, 200))
        n_blocks = n_bits // block_size

        for j in range(len(surr_persistence)):
            # Shuffle blocks (preserves within-block autocorrelation)
            block_order = rng.permutation(n_blocks)
            surr_bits = np.concatenate([
                bits[bi * block_size:(bi + 1) * block_size]
                for bi in block_order
            ])

            # Count runs in surrogate
            surr_runs_count = 1
            for k in range(1, len(surr_bits)):
                if surr_bits[k] != surr_bits[k - 1]:
                    surr_runs_count += 1

            surr_mean_run = len(surr_bits) / max(surr_runs_count, 1)
            surr_persistence[j] = surr_mean_run / expected_mean_run

        surr_mean = np.mean(surr_persistence)
        surr_std = np.std(surr_persistence)
        z_block = (persistence_ratio - surr_mean) / max(surr_std, 1e-10)
        p_block = np.mean(surr_persistence >= persistence_ratio)

        exceeded = p_block < 0.05
        if exceeded:
            n_surr_exceed += 1

        flag = " ◄ EXCEEDS" if exceeded else ""
        print(f"    Block {block_size:>5}min: surr ratio = {surr_mean:.2f} ± {surr_std:.2f}, "
              f"z = {z_block:.2f}, p = {p_block:.4f}{flag}")

        if best_block_z is None or abs(z_block) > abs(best_block_z):
            best_block_z = z_block
            best_block_p = p_block
            best_block_size = block_size

    print(f"\n  Block sizes where data exceeds surrogate: "
          f"{n_surr_exceed}/{n_block_sizes_tested}")

    if n_surr_exceed == 0:
        print(f"  → Persistence is EXPLAINED by autocorrelation alone")
        surr_verdict = "explained by autocorrelation"
    elif n_surr_exceed < n_block_sizes_tested // 2:
        print(f"  → Persistence PARTIALLY exceeds autocorrelation expectation")
        surr_verdict = "partially exceeds autocorrelation"
    else:
        print(f"  → Persistence EXCEEDS autocorrelation at most block sizes")
        surr_verdict = "exceeds autocorrelation"

    #  7c: Parity in sliding windows
    print(f"\n   7c: Sliding window parity analysis ")

    window_sizes = [7, 8, 16, 32]
    parity_results = {}

    for w in window_sizes:
        if w >= n_bits:
            continue
        # Count parity (sum mod 2) in sliding windows
        parities = np.zeros(n_bits - w + 1, dtype=int)
        running_sum = np.sum(bits[:w])
        parities[0] = running_sum % 2
        for i in range(1, len(parities)):
            running_sum += bits[i + w - 1] - bits[i - 1]
            parities[i] = running_sum % 2

        parity_bias = np.mean(parities)
        # Expected: 0.5 for random bits
        n_par = len(parities)
        z_parity = (parity_bias - 0.5) / max(np.sqrt(0.25 / n_par), 1e-10)

        # Surrogate check
        surr_parity_bias = np.zeros(min(N_SHUFFLE, 200))
        for j in range(len(surr_parity_bias)):
            block_order = rng.permutation(n_bits // 1440)
            surr_b = np.concatenate([
                bits[bi * 1440:(bi + 1) * 1440]
                for bi in block_order
            ])[:n_bits]
            if len(surr_b) < w:
                continue
            s_sum = np.sum(surr_b[:w])
            s_parities = np.zeros(len(surr_b) - w + 1, dtype=int)
            s_parities[0] = s_sum % 2
            for k in range(1, len(s_parities)):
                s_sum += surr_b[k + w - 1] - surr_b[k - 1]
                s_parities[k] = s_sum % 2
            surr_parity_bias[j] = np.mean(s_parities)

        z_vs_surr = (parity_bias - np.mean(surr_parity_bias)) / max(np.std(surr_parity_bias), 1e-10)

        parity_results[w] = {
            'bias': parity_bias,
            'z_iid': z_parity,
            'z_surr': z_vs_surr,
        }

        print(f"    Window {w:>3}: parity bias = {parity_bias:.6f} "
              f"(z_iid = {z_parity:+.1f}, z_surr = {z_vs_surr:+.1f})")

    #  7d: 168-hour (1 week) parity window test
    # Special test: does a 168-sample (~168 hour) window show parity bias?
    print(f"\n   7d: 168-hour window parity test ")
    w168 = 168
    if w168 < n_bits:
        parities_168 = np.zeros(n_bits - w168 + 1, dtype=int)
        rsum = np.sum(bits[:w168])
        parities_168[0] = rsum % 2
        for i in range(1, len(parities_168)):
            rsum += bits[i + w168 - 1] - bits[i - 1]
            parities_168[i] = rsum % 2

        parity_bias_168 = np.mean(parities_168)
        n_p168 = len(parities_168)
        z_168_iid = (parity_bias_168 - 0.5) / max(np.sqrt(0.25 / n_p168), 1e-10)

        # Surrogate test for 168h window
        surr_168 = np.zeros(min(N_SHUFFLE, 200))
        for j in range(len(surr_168)):
            block_order = rng.permutation(n_bits // 1440)
            surr_b = np.concatenate([
                bits[bi * 1440:(bi + 1) * 1440]
                for bi in block_order
            ])[:n_bits]
            if len(surr_b) < w168:
                surr_168[j] = 0.5
                continue
            s_sum = np.sum(surr_b[:w168])
            s_par = np.zeros(len(surr_b) - w168 + 1, dtype=int)
            s_par[0] = s_sum % 2
            for k in range(1, len(s_par)):
                s_sum += surr_b[k + w168 - 1] - surr_b[k - 1]
                s_par[k] = s_sum % 2
            surr_168[j] = np.mean(s_par)

        z_168_surrogate = (parity_bias_168 - np.mean(surr_168)) / max(np.std(surr_168), 1e-10)
        p_168_surr = np.mean(np.abs(surr_168 - 0.5) >= abs(parity_bias_168 - 0.5))

        print(f"    168h parity bias: {parity_bias_168:.6f}")
        print(f"    z vs IID: {z_168_iid:.1f}, z vs surrogate: {z_168_surrogate:.1f}")
        print(f"    p (surrogate): {p_168_surr:.4f}")
    else:
        parity_bias_168 = np.nan
        z_168_iid = np.nan
        z_168_surrogate = 0.0
        p_168_surr = 1.0

    return {
        'n_bits': n_bits,
        'n_runs': n_runs,
        'mean_run': mean_run,
        'persistence_ratio': persistence_ratio,
        'z_runs': z_runs,
        'p_runs': p_runs,
        'n_surr_exceed': n_surr_exceed,
        'n_block_sizes_tested': n_block_sizes_tested,
        'surr_verdict': surr_verdict,
        'parity_results': parity_results,
        'parity_bias_168': parity_bias_168,
        'z_168_iid': z_168_iid,
        'z_168_surrogate': z_168_surrogate,
        'p_168_surr': p_168_surr,
    }


#  MAIN


def main():
    parser = argparse.ArgumentParser(
        description='Saturn Signal Anomalies, Reproducibility Tests',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__
    )
    parser.add_argument('--data-dir', default=DEFAULT_DATA_DIR,
                        help='Path to data directory')
    parser.add_argument('--tests', nargs='+', type=int, default=[1, 2, 3, 4, 5, 6, 7, 8],
                        help='Which tests to run (1-8)')
    parser.add_argument('--no-plots', action='store_true',
                        help='Skip figure generation')
    parser.add_argument('--stations', nargs='+', default=None,
                        help='Magnetometer stations (default: all available)')
    parser.add_argument('--shuffles', type=int, default=1000,
                        help='Number of shuffle iterations (default: 1000)')
    args = parser.parse_args()

    global N_SHUFFLE
    N_SHUFFLE = args.shuffles
    do_plot = not args.no_plots
    data_dir = args.data_dir

    if args.stations:
        station_list = [s.upper() for s in args.stations]
    else:
        station_list = []
        for s in ALL_STATIONS:
            sdir = Path(data_dir) / s
            if not sdir.exists():
                sdir = Path(data_dir) / s.lower()
            if sdir.exists():
                station_list.append(s)
        if not station_list:
            station_list = ['OTT']

    print("=" * 70)
    print("SATURN SIGNAL ANOMALIES, REPRODUCIBILITY TESTS")
    print("=" * 70)
    print(f"Data directory: {data_dir}")
    print(f"Tests to run: {args.tests}")
    print(f"Shuffle iterations: {N_SHUFFLE}")
    print(f"Stations: {', '.join(station_list)}")
    print()

    # Load RPWS data
    rpws_needed = bool(set(args.tests) & {1, 2, 3, 4, 5, 6, 7, 8})
    frequencies, rpws_times, rpws_electric = None, None, None

    if rpws_needed:
        print("Loading RPWS data...")
        frequencies, rpws_times, rpws_electric = load_rpws(data_dir)
        if rpws_times is None:
            print("\nFATAL: Cannot load RPWS data. Tests requiring it will be skipped.")
            print("Expected location: {}/rpws_key/RPWS_KEY__2017*_?.TAB".format(data_dir))

    # Load magnetometer data
    mag_needed = bool(set(args.tests) & {3, 4, 5})
    mag_data = {}

    if mag_needed:
        print(f"\nLoading magnetometer data...")
        for station in station_list:
            print(f"  Loading {station}...")
            t, v = load_magnetometer(station, data_dir)
            if t is not None:
                mag_data[station] = (t, v)
            else:
                print(f"  WARNING: Skipping {station} (no data)")
        if not mag_data:
            print("\nWARNING: No magnetometer data loaded. Tests 3-5 will be skipped.")

    # Run tests
    results = {}

    if 1 in args.tests and rpws_times is not None:
        results[1] = test1_dow_skr(frequencies, rpws_times, rpws_electric, do_plot)

    if 2 in args.tests and rpws_times is not None:
        results[2] = test2_16min_comb(frequencies, rpws_times, rpws_electric, do_plot)

    results[3] = {}
    results[4] = {}
    results[5] = {}

    for station, (mag_times, mag_values) in mag_data.items():
        if 3 in args.tests and rpws_times is not None:
            results[3][station] = test3_mutual_information(
                rpws_times, rpws_electric, mag_times, mag_values, station, do_plot
            )

        if 4 in args.tests and rpws_times is not None:
            results[4][station] = test4_directionality(
                rpws_times, rpws_electric, mag_times, mag_values, station, do_plot
            )

        if 5 in args.tests and rpws_times is not None:
            results[5][station] = test5_substorm_prediction(
                rpws_times, rpws_electric, mag_times, mag_values, station, do_plot
            )

    if 6 in args.tests and rpws_times is not None:
        results[6] = test6_utc_modulation(frequencies, rpws_times, rpws_electric, do_plot)

    if 7 in args.tests and rpws_times is not None:
        results[7] = test7_parity_bias(frequencies, rpws_times, rpws_electric, do_plot)

    if 8 in args.tests and rpws_times is not None:
        results[8] = test8_envelope_168h(frequencies, rpws_times, rpws_electric, do_plot)

    # =====================================================================
    # SUMMARY
    # =====================================================================

    print(f"\n  {'='*60}")
    print(f"  SUMMARY")
    print(f"  {'='*60}")
    print(f"  Tests run: {args.tests}")
    print(f"  Data: {data_dir}")
    print(f"  Shuffle iterations: {N_SHUFFLE}")
    print(f"  Stations: {', '.join(station_list)}")
    print()

    # Test 1
    if 1 in results and results[1]:
        r = results[1]
        print(f"  Test 1 (Day-of-Week SKR):")
        print(f"    Kruskal-Wallis H = {r['H']:.1f}, p = {r['p']:.2e}, "
              f"shuffle p = {r['p_shuffle']:.4f}")

    # Test 2
    if 2 in results and results[2]:
        r = results[2]
        print(f"  Test 2 (16-min Comb):")
        print(f"    Z-score = {r['z_comb']:.2f}, surrogate p = {r['p_surrogate']:.4f}, "
              f"raw ratio = {r['raw_ratio']:.1f}×")

    # Tests 3-5 (per-station)
    for test_num, test_label in [(3, 'MI'), (4, 'Directionality'), (5, 'Prediction')]:
        if test_num in results and results[test_num]:
            print(f"  Test {test_num} ({test_label}):")
            for station, r in results[test_num].items():
                if r is None:
                    print(f"    {station}: no result")
                    continue
                if test_num == 3:
                    print(f"    {station}: MI lags z>3 = {r['sig_lags_mi']}, "
                          f"max |r| = {r['max_abs_r']:.4f} "
                          f"(p_adj = {r['max_r_p_adj']:.2e}), "
                          f"overall = {'SIG' if r['sig_overall'] else 'ns'}")
                elif test_num == 4:
                    print(f"    {station}: z_overall = {r['z_overall']:.2f}, "
                          f"reversal = {'YES' if r['sign_reversal'] else 'NO'}, "
                          f"mean MI = {r['mean_mi']:.5f}")
                elif test_num == 5:
                    shuf_str = ""
                    if r['shuffle_results']:
                        s0 = r['shuffle_results'][0]
                        shuf_str = f" shuf_p={s0[5]:.4f}"
                    print(f"    {station}: best={r['best_band']}→{r['best_metric']} "
                          f"at {r['best_lag']}h | t={r['t_best']:+.3f} "
                          f"d={r['cohens_d']:+.4f} | "
                          f"p_raw={r['p_best']:.6f} p_BH={r['p_best_bh']:.4f}"
                          f"{shuf_str} | "
                          f"sig: {r['n_raw_05']}/{r['n_tests']} raw, "
                          f"{r['n_bh_05']} BH "
                          f"(expected false: {r['expected_false_05']:.0f})")
        elif test_num in results:
            print(f"  Test {test_num} ({test_label}):")
            print(f"    No results")

    # Test 6
    if 6 in results and results[6]:
        r = results[6]
        print(f"  Test 6 (Earth-Rotation Modulation):")
        print(f"    24h KW: H = {r['H_utc']:.0f}, p = {r['p_utc']:.1e}, "
              f"bootstrap z = {r['z_kw']:.1f}")
        print(f"    Sidereal/Solar ratio = {r['solar_sidereal_ratio']:.3f} "
              f"→ {r['sidereal_verdict']}")
        print(f"    Phase drift: {r['phase_drift_slope']:+.1f} h/month "
              f"→ {r['drift_verdict']}")
        print(f"    Evidence lines: {r['n_evidence_for']}")

    # Test 7
    if 7 in results and results[7]:
        r = results[7]
        print(f"  Test 7 (Parity Bias):")
        print(f"    Bits = {r['n_bits']:,}, runs = {r['n_runs']:,}, "
              f"persistence = {r['persistence_ratio']:.1f}×, "
              f"surrogates exceeded = {r['n_surr_exceed']}/{r['n_block_sizes_tested']}")

    # Test 8
    if 8 in results and results[8]:
        r = results[8]
        print(f"  Test 8 (168h Envelope):")
        print(f"    Mod index = {r['mod_index']:.3f}, "
              f"168h z = {r['z_168_surrogate']:.1f}, "
              f"150-190h band p = {r['p_range']:.4f}")
    print()
    print("Figures saved to current directory (if --no-plots not set).")
    print()


if __name__ == '__main__':
    main()
Claude:
Summary
The code is competently written from a software standpoint but scientifically compromised. It has the aesthetic of rigorous analysis — multiple null controls, effect size reporting, multiple-comparison correction — but several of the tests are designed to confirm rather than challenge the hypothesis, the core premise lacks a physical mechanism, and the most prominent finding (168h/weekly periodicity) is trivially explained by human operational schedules. The sophistication of the code may make it harder, not easier, to spot these issues.                                                                                                                              You said: Is it possible that this code was in fact written by an AI?                                                              AMClaude responded: Yes, quite possibly, and there are specific markers worth pointing out rather than just a general impression.Identified multiple markers suggesting AI authorship with human directionIdentified multiple markers suggesting AI authorship with human directionYes, quite possibly, and there are specific markers worth pointing out rather than just a general impression.
Strong indicators of AI authorship:
The dead code is telling. fishers_combined_p and stouffers_z are fully implemented, documented, and never called. AI systems frequently generate utility functions that should exist in a complete statistical toolkit without tracking whether they're actually used. A human writing this for a specific purpose would either use them or not write them.
The Test 6 docstring saying "Phase 4 critical tests established that..." is probably the clearest single signal. It reads like a continuation of a multi-session AI conversation where earlier "phases" built up a narrative. A human writing a standalone script doesn't reference prior phases of their own work this way inside a docstring.
The circular methodology in Test 2 — fitting an envelope by excluding multiples-of-16 lags, then measuring elevation at multiples-of-16 — is the kind of flaw that sounds statistically coherent when described in prose but doesn't survive scrutiny. AI systems are good at generating things that sound right without checking logical consistency.
The aesthetic of rigor without substance throughout — BH correction, surrogate tests, n_eff adjustment, effect size reporting, all present and correctly implemented — combined with a physically implausible premise suggests a system that has learned what rigorous scientific code looks like without being able to evaluate whether the underlying question makes sense.
The section banners with padded spaces (#  DATA LOADING  #) are a recognizable stylistic pattern.
What complicates the conclusion:
The domain-specific content — correct Cassini RPWS file naming conventions, INTERMAGNET IAGA-2002 format parsing, specific known periods like PPO South at 11.495h and Tethys orbital at 45.306h — suggests either a knowledgeable human was directing the generation closely, or the AI had access to specific documentation. The Bartlett n_eff correction is also implemented correctly, which takes some care.
Most likely scenario:
A human with some planetary science background used an AI iteratively across multiple sessions to build up this analysis, providing the hypothesis and domain specifics, with the AI generating the statistical machinery. The human probably believed the Saturn-Earth coupling hypothesis genuinely and the AI obligingly built tests for it without flagging that the premise is physically implausible. The "Phase 4" reference suggests the AI was maintaining a narrative across sessions that the human was steering.
The result is code that is neither fully human nor fully AI — it's what happens when an AI is used as an uncritical execution engine for a motivated reasoner.