generator-dunnit

from __future__ import annotations

import random
import string
import sys
from collections import Counter
from itertools import product


def E(*args):
    print(*args, file=sys.stderr)


class GenError(Exception):
    pass


V = int
"""Type of the variable: int for simplicity but usually a string."""
U = set[V]
"""Universe: set of variables."""
S = list[set[V]]
"""Solution type. Same type as the "Exact Cover Problem"."""


def generate_solution(n: int, universe: U) -> S:
    return generate_solution_with_bound(n, universe, bound=6)
    elements = list(universe)
    random.shuffle(elements)

    # The n // 2 is not needed, it just makes the sets larger on average
    # This affects the length of the first clue
    num_partitions = random.randint(1, n // 2)
    partition_sizes = [1] * num_partitions
    for _ in range(len(elements) - num_partitions):
        partition_sizes[random.randint(0, num_partitions - 1)] += 1

    solution = S()
    idx = 0
    for size in partition_sizes:
        solution.append({*elements[idx : idx + size]})
        idx += size

    return solution


def generate_solution_with_bound(n: int, universe: U, *, bound: int) -> S:
    """Guarantee that every subset of the exact cover is of size <= bound."""
    elements = list(universe)
    random.shuffle(elements)

    num_partitions = random.randint(1, n // 20)
    num_partitions = n // 4
    partition_sizes = [1] * num_partitions
    available_partitions = set(range(num_partitions))

    for _ in range(n - num_partitions):
        if not available_partitions:
            break
            raise ValueError("No available partitions left")
        idx = random.choice(list(available_partitions))
        partition_sizes[idx] += 1
        if partition_sizes[idx] >= bound:
            available_partitions.remove(idx)

    solution = []
    idx = 0
    for size in partition_sizes:
        subset = set(elements[idx : idx + size])
        solution.append(subset)
        idx += size

    if not all(1 < len(ss) <= bound for ss in solution):
        m, *_, M = sorted([len(ss) for ss in solution])
        raise GenError(f"solution OOB. NOT ( (1 < {m}) OR ({M} <= 6) )")

    return solution


def generate_decoys(n: int, universe: U, solution: S) -> S:
    """Note that this can invalidate the unicity of the solution."""
    return generate_decoys_uniform(n, universe, solution)
    # Number of tries to inset a decoy, different from len(decoy_sets)
    n_iterations = 3 * n // 2
    n_iterations = n
    decoy_sets = S()
    for _ in range(n_iterations):
        # size_decoy = random.randint(5, 10)
        size_decoy = 5
        decoy = set(random.sample(sorted(universe), size_decoy))
        if decoy in decoy_sets:
            continue

        # Naive: to guarante single solutions
        if not any(decoy.issubset(s) for s in solution):
            decoy_sets.append(decoy)
    return decoy_sets


def generate_decoys_uniform(n: int, universe: U, solution: S) -> S:
    n_iterations = n
    decoy_sets = []
    element_counts = Counter(universe)

    elements = list(universe)
    random.shuffle(elements)

    for _ in range(n_iterations):
        size_decoy = 2
        decoy = set()

        while len(decoy) < size_decoy:
            min_count = min(element_counts.values(), default=0)
            candidates = [e for e in elements if element_counts[e] == min_count]
            chosen = random.choice(candidates)

            decoy.add(chosen)
            element_counts[chosen] += 1

        # The minimum is size_decoy + 1, but in order to have unique solution...
        if random.randint(0, 1) == 1:
            decoy.add(random.choice(list(universe)))

        if decoy in decoy_sets:
            continue

        if not any(decoy.issubset(s) for s in solution):
            decoy_sets.append(decoy)

    return decoy_sets


def try_generate_exact_cover(n: int, universe: U) -> tuple[S, S]:
    solution = generate_solution(n, universe)
    decoy_sets = generate_decoys(n, universe, solution)
    exact_cover = solution + decoy_sets
    random.shuffle(exact_cover)
    return solution, exact_cover


def generate_exact_cover(n: int, universe: U) -> tuple[S, S]:
    """Generates an exact cover instance with exactly one solution."""
    max_iterations = 10
    for _ in range(max_iterations):
        solution, exact_cover = try_generate_exact_cover(n, universe)
        if len(exact_cover) > 1:
            return solution, exact_cover
    raise GenError(f"Reached {max_iterations =}")


def random_str() -> str:
    return "".join(random.choice(string.ascii_letters) for _ in range(3))


def random_labels(n: int) -> list[str]:
    assert n < (26 * 2) ** 3, "Too many labels requested!"
    upto = 3 if n < (26 * 2) ** 2 else 4

    possible_labels = (
        "".join(letters)
        for size in range(1, upto)
        for letters in product(string.ascii_letters, repeat=size)
    )

    labels = random.sample(list(possible_labels), n)
    random.shuffle(labels)

    return labels


def diagnostics(exact_cover):
    cnt = Counter()
    tsize = 0
    for clause in exact_cover:
        tsize += len(clause)
        cnt += Counter(clause)
    tcnt = Counter()
    for v in cnt.values():
        tcnt[v] += 1
    return tsize, cnt, tcnt


Dunnit = tuple[str, list[str], str]
"""Testcase, solution, culprit."""


def xcc_to_dunnit(n: int, universe: U, xcc: S, solution: S) -> Dunnit:
    """The culprit is also returned for debugging."""
    # Optional: sorting for visibility
    xcc = sorted(sorted(cl) for cl in xcc)  # type: ignore

    n_requirements = max(max(cl) for cl in xcc)
    dunnit = [set() for _ in range(n_requirements)]
    solution_dunnit: list[str] = []
    labels = random_labels(len(xcc))
    for clause, action in zip(xcc, labels):
        for requirement in clause:
            dunnit[requirement - 1].add(action)
        if {*clause} in solution:
            solution_dunnit.append(action)

    tsize, cnt, tcnt = diagnostics(dunnit)
    # We only avoid this case because OP seems to have excluded it in its solution
    # but it's perfectly fine if one where to look at the statement
    if min(tcnt) <= 1:
        raise GenError(f"{min(tcnt)=} <= 1")
    # This is to match last testcases
    if max(tcnt) > 6:
        raise GenError(f"{max(tcnt)=} > 6")

    # E(f"{tsize = }")
    # E(cnt)
    # E(tcnt)
    # E(len(solution_dunnit))
    # E(min(tcnt), max(tcnt))

    # Test: put everything relevant at the end
    # fst, *rst = dunnit
    # rst.sort(key=lambda clause: {*clause} in solution)
    # dunnit = [fst] + rst

    # Test: put something small for candidates to abide the original constraints
    min_sz = min(len(cl) for cl in dunnit)
    for idx, cl in enumerate(dunnit):
        if len(cl) == min_sz:
            dunnit.insert(0, dunnit.pop(idx))
            break

    out = ""
    out += f"{len(dunnit)} {len(dunnit[0])}\n"
    E(f"{len(dunnit)} {len(dunnit[0])}")
    for clause in dunnit:
        out += ", ".join(sorted(clause))
        out += "\n"

    culprit = "none"
    for action in solution_dunnit:
        if action in dunnit[0]:
            culprit = action

    return out.strip(), solution_dunnit, culprit


SEP = "=" * 20

G = tuple[int, Dunnit, S]
"""Generated result."""


def try_generate() -> G:
    n = 200
    seed = random.randint(0, 1 << 16)
    random.seed(seed)
    universe = set(range(1, n + 1))
    solution, exact_cover = generate_exact_cover(n, universe)
    # E(f"{n} {len(exact_cover)}")
    # for s in exact_cover:
    #     E(*s)
    # E(SEP)
    # E(solution)
    # E(SEP)
    dunnit = xcc_to_dunnit(n, universe, exact_cover, solution)
    return seed, dunnit, exact_cover


def generate() -> G:
    max_iterations = 2000
    for _ in range(max_iterations):
        try:
            gen = try_generate()
            seed, dunnit, exact_cover = gen
            dunnit, solution_dunnit, culprit = dunnit
            if len(solution_dunnit) < 40:
                raise GenError

            tsize, cnt, tcnt = diagnostics(exact_cover)
            E(f"{tsize = }")
            E(tcnt)
            E(min(tcnt), max(tcnt))
            return gen
        except GenError as ge:
            E(ge)
            pass
    raise GenError(f"Max iterations {max_iterations} reached in generate")


def main() -> None:
    seed, (dunnit, solution_dunnit, culprit), _ = generate()
    print(dunnit)
    E(seed)
    E(SEP)
    E(*solution_dunnit)
    E(f"{len(solution_dunnit) = }")
    E(SEP)
    E("culprit:", culprit)


if __name__ == "__main__":
    main()