Source code for minisim.spec

"""Typed, serializable specification for the ``minisim`` pipeline.

This module defines the *contract* the simulator consumes: a tree of pydantic
v2 models describing the acquisition (a real, physical interface), an ordered
list of pipeline steps, and output formatting. It is the inverse of the minian
analysis pipeline expressed as data - the same ``Spec`` object a training
notebook walks through, a test parametrizes over, and a cache keys on.

Two layers, by design:

* **Layer 1 - what you read off a datasheet.** ``Optics.na``,
  ``Optics.magnification``, ``Tissue.scatter_mfp_emission_um`` and friends. Every knob a
  user touches here is a real, measurable property of a real scope or sample.
* **Layer 2 - what a step consumes.** Pixel size, PSF sigma, attenuation, noise
  variance - *derived* from Layer 1 by small, documented, individually-testable
  helpers.

This file defines the full Layer-1 surface, the unit conversions, the static
``AnyStep`` union, and the Layer-2 physics helpers -
``Optics.diffraction_sigma_um``/``defocus_sigma_um``, ``Tissue.attenuation``/
``scatter_sigma_um``, the combined ``Acquisition.cell_optics``, and the
``ImageSensor.photons_to_counts`` sensor model. ``StepSpec.build()``
turns a spec into its executable step by looking its ``kind`` up in the
declarative :data:`minisim.steps.STEP_FOR_KIND` table; the step bodies
themselves live in :mod:`minisim.steps`.

Units convention: **everything physical is in seconds and µm/mm - never frames
or pixels.** ``Acquisition`` owns every conversion to pixels/frames.
"""

from __future__ import annotations

import hashlib
import math
import warnings
from collections import Counter
from collections.abc import Iterable, Mapping
from typing import TYPE_CHECKING, Annotated, ClassVar, Literal, TypeVar

import numpy as np
from pydantic import (
    BaseModel,
    ConfigDict,
    Discriminator,
    Field,
    field_validator,
    model_validator,
)

if TYPE_CHECKING:
    from minisim.steps.base import Step



[docs]
class SpecWarning(UserWarning):
    """Advisory warning for unusual-but-legal spec configurations.

    The simulator distinguishes *invalid* configs (which raise) from *unusual*
    ones (which warn but still run) - e.g. a focal plane outside the cell depth
    range, or motion larger than the configured FOV margin.
    """



class _Base(BaseModel):
    """Common config for every spec model.

    ``extra="forbid"`` turns a mistyped field name into a construction-time
    error instead of a silently-ignored value. ``frozen=True`` makes specs
    immutable, which is what lets a recording be identified by its spec: the
    cache keys off the canonical JSON form (see ``Spec.cache_key``).
    """

    model_config = ConfigDict(extra="forbid", frozen=True)


# ---------------------------------------------------------------------------
# Physical interface - Acquisition / Optics / Tissue (Layer 1 + unit conversions)
# ---------------------------------------------------------------------------


# Immersion/tissue refractive index used to derive the diffraction depth of field
# from NA (≈ n·λ/NA²); ~1.33 for the watery cortex a miniscope images into.
_DOF_IMMERSION_N = 1.33



[docs]
class Optics(_Base):
    """Objective optics - the measurable lens properties of a 1-photon scope.

    Layer-2 phenomenological quantities (diffraction sigma, defocus blur) are
    *derived* from these fields by the helper methods below. Pixel
    size is a joint optics×sensor quantity (sensor pitch / magnification) and so
    lives on ``Acquisition``, not here.

    Typical 1-photon miniscope ranges: NA 0.3–0.6, magnification ~5–10×, GCaMP
    emission ~510–540 nm.
    """

    na: float = Field(gt=0, default=0.45, description="Numerical aperture of the GRIN objective.")
    magnification: float = Field(gt=0, default=8.0, description="Optical magnification (sensor side / object side).")
    emission_nm: float = Field(gt=0, default=525.0, description="Fluorophore emission wavelength, nm (GCaMP ≈ 525).")
    depth_of_field_um: float | Literal["auto"] = Field(
        default="auto",
        description="±in-focus half-depth around the focal plane, µm. 'auto' (default) "
        "derives it from NA as ≈ n·λ/NA² (the diffraction depth of field - the physical "
        "behavior, since DOF is set by the optics, not chosen); a number overrides it.",
    )
    field_curvature_radius_um: float | None = Field(
        default=None,
        description="Petzval field-curvature radius, µm (typical miniscope ≈ 2000–3000). "
        "Off-axis cells focus *shallower* by the spherical sagitta; None = ideal flat "
        "field. A miniscope has no room for a field flattener, so this is usually finite.",
    )

    @field_validator("field_curvature_radius_um")
    @classmethod
    def _check_curvature(cls, v: float | None) -> float | None:
        if v is not None and v <= 0:
            raise ValueError(
                f"field_curvature_radius_um ({v}) must be > 0, or None for a flat field."
            )
        return v

    @field_validator("depth_of_field_um")
    @classmethod
    def _check_dof(cls, v: float | Literal["auto"]) -> float | Literal["auto"]:
        if v != "auto" and v <= 0:
            raise ValueError(f"depth_of_field_um ({v}) must be > 0, or 'auto'.")
        return v

    @property
    def resolved_depth_of_field_um(self) -> float:
        """The in-focus half-depth, µm, resolving ``"auto"`` to ≈ n·λ/NA².

        A numeric ``depth_of_field_um`` is used as-is. ``"auto"`` derives the
        diffraction depth of field from the aperture: ``σ_z ≈ n·λ/NA²`` (immersion
        index ``n`` ≈ tissue), the same half-depth the in-focus check uses. Higher
        NA ⇒ shallower focus (DOF falls as 1/NA²), so a real scope's DOF is set by
        its optics rather than picked by hand. At NA 0.30, λ ≈ 525 nm this is
        ≈ 7.8 µm; at NA 0.45, ≈ 3.4 µm."""
        if self.depth_of_field_um != "auto":
            return float(self.depth_of_field_um)
        return _DOF_IMMERSION_N * (self.emission_nm / 1000.0) / self.na**2

    # ---- Layer-2 helpers: small, documented, individually-testable approximations ----

    @property
    def diffraction_sigma_um(self) -> float:
        """Diffraction-limited PSF width (Gaussian σ), µm.

        A Gaussian stand-in for the Airy disk: the diffraction FWHM is
        ``≈ 0.51·λ/NA`` and ``σ = FWHM / 2.355 ≈ 0.21·λ/NA``. Ignores
        aberrations and the finite Airy tails - adequate for showing how NA and
        emission wavelength set the resolution floor. Smaller NA ⇒ larger σ
        (blurrier). At NA 0.45, λ ≈ 525 nm this is σ ≈ 0.24 µm.

        Note - pixel-limited, not diffraction-limited. Across realistic 1-photon
        miniscope NAs (~0.1–0.6) and green emission this σ is only ~0.2–1.1 µm,
        i.e. at or below the *object-space pixel size* (sensor pitch ÷
        magnification, typically 1–2 µm). So lateral resolution is set by the
        pixel sampling, not by diffraction - the diffraction PSF is real but
        rarely the limiting blur (defocus and scatter usually dominate it too).
        A practical consequence: a cell's intrinsic shape can be generated on a
        fine, sub-pixel grid *independent of the sensor*, then resampled to
        whatever pixel size the sensor implies, because the optics never resolve
        anything finer than the pixel grid anyway. (The teaching notebook relies
        on exactly this so that changing magnification/pitch only rescales a cell
        rather than re-rasterizing - and re-randomizing - its shape.)
        """
        return 0.21 * (self.emission_nm / 1000.0) / self.na

    def defocus_sigma_um(self, z_um: float, focal_um: float) -> float:
        """Out-of-focus blur (Gaussian σ), µm, for a cell at depth ``z_um``.

        Geometric defocus broadens linearly with the distance from the focal
        plane: ``σ ≈ NA·|z − z_focal|``. Symmetric about the focal plane (zero
        at ``z == focal_um``) and larger for higher NA (shallower depth of
        field). INTENSITY-CONSERVING: defocus spreads light without losing it,
        so the matching peak drop is applied in
        :meth:`Acquisition.cell_optics`; that is what separates defocus
        (spreads) from scatter (attenuates). ``focal_um`` is passed explicitly
        because :attr:`Acquisition.focal_depth_in_tissue_um` may be ``"auto"``, resolved to a
        concrete depth by the optics step before this is called.
        """
        return self.na * abs(z_um - focal_um)

    @property
    def collection_efficiency(self) -> float:
        """Fraction of a cell's emitted light the objective collects - ``∝ NA²``.

        A lens gathers light over a collection cone whose solid angle grows with
        ``NA²`` (small-angle ``Ω ∝ sin²θ = NA²``), so a low-NA miniscope objective
        is *fundamentally* dimmer than a high-NA one - independently of focus or
        depth. This is a flat multiplicative light-loss applied alongside scatter
        :meth:`Tissue.attenuation`; the absolute proportionality constant
        (``1/4n²`` etc.) is absorbed into the ``sensor`` step's
        ``photons_per_unit`` exposure scale, so what matters here is the ``NA²``
        scaling. At NA 0.18 vs 0.45 this is a ~6× brightness difference."""
        return self.na**2

    def focal_curvature_shift_um(self, r_um: float) -> float:
        """Field-curvature focal shift at field radius ``r_um`` from the axis, µm.

        Without a field flattener - which a miniscope has no room for - off-axis
        points focus on a curved (≈spherical) surface, not a plane. A point at
        radius ``r`` from the optical axis comes into best focus *shallower*
        (nearer the objective) than the on-axis focal plane, by the spherical
        sagitta ``R − √(R² − r²) ≈ r²/(2R)``. Returns a **non-negative** shift to
        be *subtracted* from the central focal depth (the in-focus surface bows
        toward the objective at the edges, always, for miniscope/standard optics).
        Zero when :attr:`field_curvature_radius_um` is ``None`` (an ideal flat
        field). Typical radii are 2–3 mm - large vs a soma, so a cell can be
        evaluated at its center rather than warping the footprint across the field.
        """
        radius = self.field_curvature_radius_um
        if radius is None:
            return 0.0
        r = min(abs(r_um), radius)  # clamp keeps the sqrt real for pathological r > R
        return radius - math.sqrt(radius * radius - r * r)




[docs]
class ImageSensor(_Base):
    """Physical and noise properties of the bare image sensor (the detector).

    Named *image sensor*, not *camera*, on purpose: a camera bundles optics on
    top of a sensor, whereas this is only the photosensitive array and its
    readout chain. The optics live separately on ``Optics``. Together with the
    exposure scale on the ``sensor`` step, the fields here fully specify the
    photons→counts conversion.

    Typical CMOS miniscope sensors: ~2–6 µm pixel pitch, QE 0.6–0.9, read noise
    1–5 e⁻ RMS, 8–12-bit ADC.
    """

    n_px_height: int = Field(gt=0, default=256, description="Sensor height, pixels.")
    n_px_width: int = Field(gt=0, default=256, description="Sensor width, pixels.")
    pixel_pitch_um: float = Field(gt=0, default=3.0, description="Physical sensor pixel pitch, µm.")
    quantum_efficiency: float = Field(gt=0, le=1, default=0.7, description="Photon → electron conversion efficiency.")
    read_noise_e: float = Field(ge=0, default=2.0, description="Read noise, electrons RMS.")
    gain_adu_per_e: float = Field(gt=0, default=1.0, description="Camera gain, ADU per electron.")
    bit_depth: int = Field(gt=0, default=8, description="ADC bit depth; counts clipped to [0, 2^bit_depth − 1].")

    def photons_to_counts(self, photons: np.ndarray, rng: np.random.Generator) -> np.ndarray:
        """Forward sensor model: incident photons → digitized ADC counts.

        The only place fluorescence becomes integer counts::

            λ      = photons · quantum_efficiency            # expected detected e⁻
            e⁻     = Normal(λ, √(λ + read_noise_e²))         # shot + read noise
            adu    = e⁻ · gain_adu_per_e
            counts = clip(floor(adu), 0, 2**bit_depth − 1)

        The two physical noise sources are combined into a **single Gaussian**. Shot
        noise is Poisson on the detected electrons (variance ``λ``); read noise is
        additive Gaussian in electrons (variance ``read_noise_e²``). Because the two
        are independent their variances add, and Poisson(λ) ≈ Normal(λ, λ) to high
        accuracy once ``λ`` is more than ~10 - which holds across essentially every
        pixel of a 1-photon recording, where a substantial background (out-of-focus
        fluorescence, neuropil, leakage glow) keeps ``λ`` well above that floor. So
        the electron count is drawn as ``Normal(λ, √(λ + read_noise_e²))``: one
        ``standard_normal`` draw that carries *both* the signal-dependent shot term
        (the ``√λ``, so brighter pixels are noisier) and the constant read term. This
        is the standard sensor-noise model, and it is far cheaper than sampling a
        large-λ Poisson; the only thing dropped is the discrete-Poisson shape at
        near-zero λ, the regime a 1p recording's background keeps it out of. Read
        noise dominates anyway in the dim, high-analog-gain corner.

        Quantization is ``floor`` (an ADC truncates), and counts are clipped to the
        converter's representable range. ``photons`` is the per-pixel expected photon
        count - the ``Sensor`` step produces it from scene intensity × its
        ``photons_per_unit`` exposure scale. Returns a float array holding
        integer-valued counts (the float container is set by ``Output.store_dtype``).
        """
        photons = np.asarray(photons, dtype=float)
        lam = photons * self.quantum_efficiency  # expected detected electrons
        # Shot (variance λ) + read (variance read_noise_e²) as one Gaussian: a single
        # standard_normal draw scaled by the combined std. One draw per call keeps the
        # streaming writer's per-frame replay in lock-step with simulate().
        std = np.sqrt(lam + self.read_noise_e**2)
        electrons = lam + std * rng.standard_normal(photons.shape)
        counts = np.floor(electrons * self.gain_adu_per_e)
        return np.clip(counts, 0.0, 2**self.bit_depth - 1)




[docs]
class Tissue(_Base):
    """Light-scattering properties of the imaged tissue, as a function of depth.

    The fields parametrize Layer-2 helpers (``attenuation``, ``scatter_sigma``).
    Scattering has two separable consequences on a cell's image, modelled by two
    knobs: it *dims* the sharp signal (light scattered out of the collection cone
    is lost → :meth:`attenuation`, the ``scatter_mfp_*`` fields) and it *blurs*
    the footprint (forward-scattered light, ``g ≈ 0.88``, is recollected as a
    growing halo → :meth:`scatter_sigma_um`, ``scatter_blur_per_um``).

    Round-trip scattering, asymmetric. The signal makes two passes through
    tissue, but they attenuate very differently:

    * **Excitation in (≈470 nm)** is delivered by *widefield* illumination, so it
      reaches a cell as a *diffuse* fluence, not a ballistic beam. Diffuse light
      penetrates far (transport length ≈ 800 µm) and its fluence actually peaks a
      few hundred µm deep before falling (Ma et al. 2020, Neurophotonics
      7:031208), so over the depths a 1-photon scope images, excitation barely
      dims cells - a *long* effective MFP (``scatter_mfp_excitation_um``).
    * **Emission out (≈525 nm)** is the *image-forming* sharp signal, which
      decays at roughly the scattering MFP (``scatter_mfp_emission_um``). This leg
      dominates the depth-dimming.

    Modelling excitation as ballistic (a short, symmetric leg) double-counts the
    loss and makes deep cells unrealistically dim; the asymmetric split above is
    both more honest and what keeps the round trip from over-attenuating.

    Literature anchors (mouse cortex / gray matter). The *ballistic* scattering
    mean free path at blue/green is ≈ 40–50 µm (μ_s ≈ 200 cm⁻¹, g ≈ 0.86–0.89):
    ≈ 47 µm at 473 nm (Al-Juboori et al. 2013, PLoS ONE 8:e67626) and ≈ 38 µm at
    515 nm (Azimipour et al. 2014, Biomed. Opt. Express). That ballistic length is
    what sets the *blur* rate. The light an objective actually *collects* decays
    more slowly, because the strong forward scattering (g ≈ 0.88) is largely
    recollected - so the emission leg uses the *high end* of the scattering-MFP
    literature (~100 µm), and the diffuse excitation leg is longer still; their
    round trip gives an effective ≈ 85 µm (see :attr:`scatter_mfp_um`).
    """

    scatter_mfp_excitation_um: float = Field(
        gt=0,
        default=600.0,
        description="Effective attenuation MFP for the excitation leg (≈470 nm, in) - long, diffuse fluence, µm.",
    )
    scatter_mfp_emission_um: float = Field(
        gt=0,
        default=100.0,
        description="Effective attenuation MFP for the emission leg (≈525 nm, out) - the image-forming scattering MFP, µm.",
    )
    scatter_blur_per_um: float = Field(
        ge=0,
        default=0.05,
        description="Linear broadening of the footprint per µm of depth (µm sigma per µm depth).",
    )

    # ---- Layer-2 helpers: scattering as a function of absolute depth ----

    @property
    def scatter_mfp_um(self) -> float:
        """Effective round-trip (excitation × emission) attenuation MFP, µm.

        The two exponential legs multiply, ``exp(−z/mfp_ex)·exp(−z/mfp_em) =
        exp(−z/mfp_eff)``, so the combined length is the harmonic-style
        reciprocal sum ``1/mfp_eff = 1/mfp_ex + 1/mfp_em``. With the defaults
        (excitation 600 µm, emission 100 µm) this is ≈ 85.7 µm - dominated by the
        emission leg, since the diffuse excitation leg attenuates little over
        imaging depths (see the class docstring for why the legs are asymmetric).
        """
        return 1.0 / (1.0 / self.scatter_mfp_excitation_um + 1.0 / self.scatter_mfp_emission_um)

    def attenuation(self, z_um: float) -> float:
        """Fraction of light surviving the round-trip scatter from depth ``z_um`` - in (0, 1].

        Beer–Lambert decay applied on *both* legs the signal travels: excitation
        in (≈470 nm) then emission out (≈525 nm). The product collapses to a
        single exponential over the effective MFP, ``exp(−z / scatter_mfp_um)``
        (see :attr:`scatter_mfp_um`). Monotonically decreasing in depth and equal
        to 1 at the surface (``z = 0``). Genuinely *removes* light - unlike
        defocus - so a deep cell is irreversibly dimmer, the irreducible limit
        the module teaches.
        """
        return math.exp(-z_um / self.scatter_mfp_um)

    def scatter_sigma_um(self, z_um: float) -> float:
        """Scatter-induced footprint broadening (Gaussian σ), µm, at depth ``z_um``.

        Linear phenomenological model ``σ = scatter_blur_per_um · z``: deeper
        cells scatter more and so appear both larger and dimmer (see
        :meth:`attenuation`). Monotonically increasing in depth and zero at the
        surface. Unlike defocus this is not intensity-conserving - it co-occurs
        with attenuation. The rate is set by the ballistic scattering MFP
        (~40–50 µm at blue/green; Al-Juboori et al. 2013): with the default 0.05,
        a cell at 100 µm picks up σ ≈ 5 µm (FWHM ≈ 12 µm, about a soma diameter),
        so deep cells read as the blurry halos seen in real 1-photon data.
        """
        return self.scatter_blur_per_um * z_um




[docs]
class Acquisition(_Base):
    """The physical acquisition: optics, image sensor, tissue, and sampling.

    Owns *all* unit conversions between the physical world (µm, seconds) and the
    sampled world (pixels, frames). Pixel size is the joint optics×sensor
    quantity ``image_sensor.pixel_pitch_um / optics.magnification``; FOV is then
    derived from the sensor's pixel count - any two of {FOV, pixel size, pixel
    count} fix the third.
    """

    optics: Optics = Field(default_factory=Optics)
    image_sensor: ImageSensor = Field(default_factory=ImageSensor)
    tissue: Tissue = Field(default_factory=Tissue)
    fps: float = Field(gt=0, default=20.0, description="Frame rate, frames per second.")
    duration_s: float = Field(gt=0, default=150.0, description="Recording duration, seconds.")
    focal_depth_in_tissue_um: float | Literal["auto"] = Field(
        default="auto",
        description="Depth of the focal plane below the tissue surface, µm (0 = surface), "
        "in the same coordinate as each cell's depth z. Cells above or below it defocus; "
        "'auto' resolves to the median realized cell depth at the optics step.",
    )
    front_working_distance_um: float | None = Field(
        default=None,
        description="Front working distance (lens front → focal point), µm - Miniscope V4 ≈ "
        "700. Informational only: it does NOT affect the simulation (the optics math uses "
        "focal_depth_in_tissue_um), but it's a physically relevant number for surgery/implant "
        "planning, so it's recorded here.",
    )

    @field_validator("focal_depth_in_tissue_um")
    @classmethod
    def _check_focal_depth(cls, v: float | Literal["auto"]) -> float | Literal["auto"]:
        if v != "auto" and v < 0:
            raise ValueError(f"focal_depth_in_tissue_um ({v}) must be ≥ 0, or 'auto'.")
        return v

    @field_validator("front_working_distance_um")
    @classmethod
    def _check_fwd(cls, v: float | None) -> float | None:
        if v is not None and v <= 0:
            raise ValueError(f"front_working_distance_um ({v}) must be > 0, or None.")
        return v

    @property
    def pixel_size_um(self) -> float:
        """Object-space size of one pixel, µm (sensor pitch / magnification)."""
        return self.image_sensor.pixel_pitch_um / self.optics.magnification

    @property
    def n_frames(self) -> int:
        """Number of frames in the recording (duration × fps, rounded)."""
        return round(self.duration_s * self.fps)

    @property
    def fov_um(self) -> tuple[float, float]:
        """Field of view (height, width) in µm - derived from pixels × pixel size."""
        return (
            self.image_sensor.n_px_height * self.pixel_size_um,
            self.image_sensor.n_px_width * self.pixel_size_um,
        )

    def um_to_px(self, um: float) -> float:
        """Convert a physical distance (µm) to pixels."""
        return um / self.pixel_size_um

    def s_to_frame(self, s: float) -> float:
        """Convert a duration (seconds) to a (fractional) frame count."""
        return s * self.fps

    def cell_optics(self, z_um: float, focal_um: float) -> tuple[float, float]:
        """Combined per-cell optical degradation: ``(sigma_px, brightness)``.

        Folds the three Layer-2 effects into the two numbers the optics step
        applies to a footprint - a blur width (pixels) and a brightness scale.
        Blurs add in quadrature; brightness factors multiply::

            σ_0   = hypot(diffraction_sigma_um, scatter_sigma_um(z))   # all but defocus
            σ_tot = hypot(σ_0, defocus_sigma_um(z, focal))
            brightness = (σ_0² / σ_tot²) · attenuation(z) · collection_efficiency
            sigma_px   = σ_tot / pixel_size_um

        The ``σ_0²/σ_tot²`` factor is the peak drop that makes defocus
        intensity-conserving (a 2-D Gaussian's peak × area is constant); the two
        light-loss factors that actually remove signal are scatter
        ``attenuation(z)`` (depth) and ``collection_efficiency`` (``∝ NA²``, the
        objective's light-gathering power). Both are independent of the focal
        plane, so ``sigma_px² · brightness`` remains independent of the focal
        plane - the invariant the conservation test asserts. ``focal_um`` is the
        resolved (numeric) focal depth; ``diffraction_sigma_um > 0`` always, so
        ``σ_tot`` is never zero.

        Two distinct quantities come out, consumed separately by the optics
        step: ``sigma_px`` is the PSF width the footprint is *convolved* with,
        and the footprint's brightness then scales by ``attenuation(z)`` **alone**
        - the convolution itself produces the defocus peak drop, so applying
        ``brightness`` to the footprint too would double-count it. The returned
        ``brightness`` is instead the *point-source peak* scalar: how far a
        cell's peak signal sits above the noise, i.e. the per-cell effective
        brightness used (with the illumination field and sensor floor) to decide
        detectability at ``finalize()``.
        """
        sigma_0 = math.hypot(self.optics.diffraction_sigma_um, self.tissue.scatter_sigma_um(z_um))
        sigma_total = math.hypot(sigma_0, self.optics.defocus_sigma_um(z_um, focal_um))
        brightness = (
            (sigma_0**2 / sigma_total**2)
            * self.tissue.attenuation(z_um)
            * self.optics.collection_efficiency
        )
        sigma_px = sigma_total / self.pixel_size_um
        return sigma_px, brightness



# ---------------------------------------------------------------------------
# Output formatting
# ---------------------------------------------------------------------------



[docs]
class Output(_Base):
    """Final-array formatting - formatting only, never rescaling (honest radiometry)."""

    save_intermediates: bool = Field(
        default=False,
        description="Retain a snapshot after every step (test oracle + teaching visuals). "
        "Default False to keep memory flat for the programmatic and sweep paths; the "
        "two notebook presets opt in explicitly. When False, only `observed` + "
        "`ground_truth` are kept and stage() raises.",
    )
    store_dtype: Literal["float32", "float64"] = Field(
        default="float32",
        description="Float container for the integer-valued sensor counts.",
    )



# ---------------------------------------------------------------------------
# Step framework - StepSpec base + registry + the static AnyStep union
# ---------------------------------------------------------------------------


# The canonical execution order of the pipeline as a total order over step kinds.
# It follows the physical domains (cell -> tissue -> motion -> sensor) and fixes
# the within-domain order too. This is THE order steps run in, so the order a caller
# lists them in does not matter - :func:`order_steps` (used by both ``simulate``
# and :class:`Spec`) sorts any list into this one. It is a topological extension
# of every step's ``requires`` (a step always follows the kinds it consumes); the
# test suite asserts that, and that this is a permutation of the step catalog, so
# the order cannot silently drift as kinds are added.
_PIPELINE_ORDER: tuple[str, ...] = (
    "place_neurons",
    "cell_activity",
    "bleaching",
    "optics",
    "composite",
    "neuropil",
    "vasculature",
    "brain_motion",
    "illumination_profile",
    "vignette",
    "leakage",
    "sensor",
)
_KIND_RANK: dict[str, int] = {kind: i for i, kind in enumerate(_PIPELINE_ORDER)}

_StepT = TypeVar("_StepT", bound="StepSpec")


def order_steps(steps: Iterable[_StepT]) -> list[_StepT]:
    """Sort steps into the canonical pipeline order (:data:`_PIPELINE_ORDER`).

    :class:`Spec` runs every step list through this, so the order steps are listed
    in never matters. A *stable* sort: steps of equal rank keep their input order,
    and a kind absent from :data:`_PIPELINE_ORDER` (a future/unknown step) sorts to
    the end rather than raising, so no step is dropped. Returns a new list; the step
    specs themselves are not copied or mutated.
    """
    return sorted(steps, key=lambda s: _KIND_RANK.get(s.kind, len(_PIPELINE_ORDER)))



[docs]
class StepSpec(_Base):
    """Base class for a single pipeline step's configuration.

    A concrete step spec carries its physical parameters and a literal ``kind``
    discriminator, and declares its ``domain`` (a class attribute). ``build()``
    turns the spec into the executable step that mutates a ``Scene``, resolving
    ``kind`` through the :data:`minisim.steps.STEP_FOR_KIND` table.

    ``requires`` declares the step kinds whose output this step consumes through
    the shared ``Scene`` (e.g. ``composite`` reads the footprints ``place_neurons``
    makes and the traces ``cell_activity`` makes). It is about *presence-order*, not
    completeness: a present requirement is placed before this step by the canonical
    ordering (:data:`_PIPELINE_ORDER`), but it may be absent entirely. Partial
    pipelines are first-class - a spec of ``[place_neurons, cell_activity,
    composite]`` with no sensor is valid, so targeted test data for a downstream
    calcium pipeline can exercise just a few stages.
    """

    domain: ClassVar[Literal["cell", "tissue", "motion", "sensor"]]
    requires: ClassVar[tuple[str, ...]] = ()
    kind: str

    def build(self, acq: Acquisition, rng) -> Step:
        """Return the executable step (a callable that mutates a Scene).

        Resolves this spec's ``kind`` through the declarative
        :data:`minisim.steps.STEP_FOR_KIND` table and constructs the matching
        step. The table is imported lazily because the step modules depend
        (through ``recording``) back on this module, so it cannot be imported at
        load time.
        """
        from minisim.steps import STEP_FOR_KIND

        return STEP_FOR_KIND[self.kind](self, acq, rng)



# ---------------------------------------------------------------------------
# Step catalog (cell → tissue → motion → sensor). Fields define the v1 surface;
# the executable `build()` bodies live in `minisim.steps`.
# ---------------------------------------------------------------------------



[docs]
class NeuronPopulation(_Base):
    """One homogeneous group of neurons to place: a morphology + a 3-D distribution.

    A *population* is the unit ``place_neurons`` actually samples - one cell shape
    (soma-only or cytosolic) at one soma size, scattered at one volumetric density
    across one depth range. A single population is the common case; list several on
    :attr:`PlaceNeurons.populations` to build layered anatomy - e.g. a thin
    soma-targeted band over a deep cytosolic volume. The cell *count* is derived
    volumetrically from ``density_per_mm3`` and the depth thickness (see
    :func:`~minisim.steps.cell.sample_neurons`); brightness is biology and is drawn
    later in ``cell_activity``, never here.
    """

    density_per_mm3: float = Field(
        gt=0,
        default=25000.0,
        description="Cell volumetric density (cells/mm³); count = density × FOV area "
        "× depth thickness, the thickness floored at one soma diameter so a thin or "
        "planar layer still yields cells.",
    )
    soma_radius_um: float = Field(gt=0, default=7.0, description="Soma radius, µm (typical cortical neuron ≈ 5–10).")
    irregularity: float = Field(
        ge=0,
        le=1,
        default=0.3,
        description="0 = smooth disk; higher = lumpier soma (low-pass-noise threshold).",
    )
    morphology: Literal["soma", "cytosolic"] = Field(
        default="soma",
        description="GCaMP targeting variant: 'soma' = soma-targeted (lumpy disk "
        "only); 'cytosolic' = standard GCaMP (soma + branched proximal dendrites).",
    )
    dendrite_length_um: float = Field(
        gt=0,
        default=24.0,
        description="Nominal proximal-dendrite reach, µm (cytosolic only); kept "
        "proximal so the arbor shapes the blurred footprint without blowing up the "
        "sparse-patch bounding box. The per-cell count is drawn randomly (not a "
        "spec input, so cells differ), and each dendrite's length jitters around "
        "this value and may branch.",
    )
    dendrite_width_um: float = Field(
        gt=0,
        default=3.0,
        description="Proximal-dendrite base width (diameter), µm; tapers to a "
        "~1 px thread at the tip (cytosolic only).",
    )
    depth_range_um: tuple[float, float] = Field(
        default=(0.0, 200.0), description="(min, max) depth into tissue, µm."
    )
    min_distance_um: float = Field(
        ge=0, default=0.0, description="3-D center-to-center minimum (Poisson-disk if > 0)."
    )
    positions_um: list[tuple[float, float, float]] | None = Field(
        default=None,
        description="Explicit soma centers as (z, y, x) µm tuples - depth, row, column - "
        "in the tissue frame (origin = canvas top-left, the same coordinates the "
        "sampled centers and ground-truth positions use; note the depth-first order, "
        "matching Cell.center_um rather than x,y,z). When given, these exact positions "
        "are placed instead of density-sampling, so the distribution fields "
        "(density_per_mm3, depth_range_um, min_distance_um) are ignored; the shape "
        "fields (soma_radius_um, irregularity, morphology, dendrites) still apply to "
        "each placed cell.",
    )

    @field_validator("depth_range_um")
    @classmethod
    def _check_depth_range(cls, v: tuple[float, float]) -> tuple[float, float]:
        lo, hi = v
        if lo < 0:
            raise ValueError(f"depth_range_um min ({lo}) must be ≥ 0.")
        if hi < lo:
            raise ValueError(f"depth_range_um max ({hi}) must be ≥ min ({lo}).")
        return v




[docs]
class PlaceNeurons(StepSpec, NeuronPopulation):
    """Place generic neurons in a 3-D µm volume, soma-only or with dendrites.

    'Place' is the verb - this *positions neurons in space* (anchored at the cell
    body); it is unrelated to hippocampal *place cells*. v1 models one generic
    excitable cell type (an irregular soma blob) with two GCaMP targeting variants
    via ``morphology``: ``"soma"`` (soma-targeted, body only) or ``"cytosolic"``
    (standard GCaMP, the soma plus a few tapering proximal dendrites). There is no
    further cell-type distinction and no spatial/behavioral tuning. Footprints are
    2-D masks carrying a scalar depth ``z``; out-of-focus neurons that become
    background emerge for free downstream from ``z`` + ``optics``.

    The step *is* a single :class:`NeuronPopulation`: its inherited fields
    (``morphology``, ``soma_radius_um``, ``depth_range_um``, …) describe that one
    group. To place several distinct groups together - a thin soma-targeted layer
    over a deep cytosolic volume, say - set :attr:`populations` to a list of
    ``NeuronPopulation`` instead; the step then samples each in turn (its own
    step-level population fields are ignored, and mixing the two raises).
    """

    domain: ClassVar[str] = "cell"
    kind: Literal["place_neurons"] = "place_neurons"
    populations: list[NeuronPopulation] | None = Field(
        default=None,
        description="Distinct neuron populations to place together (e.g. a thin "
        "layer + a deep volume). None (default) = the step is a single population "
        "described by its own fields; a list = sample each entry in turn and ignore "
        "the step-level population fields.",
    )

    @property
    def resolved_populations(self) -> list[NeuronPopulation]:
        """The population(s) to place: explicit ``populations`` or the step-as-one.

        When ``populations`` is None the step is itself one population, so rebuild a
        plain :class:`NeuronPopulation` from the inherited fields - that way callers
        (the step, :func:`~minisim.steps.cell.sample_neurons`) iterate one uniform
        list either way.
        """
        if self.populations is not None:
            return self.populations
        return [
            NeuronPopulation(**{f: getattr(self, f) for f in NeuronPopulation.model_fields})
        ]

    @model_validator(mode="after")
    def _check_populations(self) -> PlaceNeurons:
        if self.populations is None:
            return self
        if not self.populations:
            raise ValueError("populations must list at least one NeuronPopulation, or be None.")
        clash = sorted(self.model_fields_set & set(NeuronPopulation.model_fields))
        if clash:
            raise ValueError(
                f"place_neurons sets both populations and step-level population "
                f"field(s) {clash}; when using populations, set those on each "
                "NeuronPopulation entry instead (the step-level fields are ignored)."
            )
        return self




[docs]
class CellActivity(StepSpec):
    """Calcium activity: 2-state Markov gate → Poisson spikes → double-exp kernel.

    Modeled on the CaLab web simulator: spikes are generated on a high-resolution
    grid (``spike_sim_hz``, ~300 Hz), convolved with the double-exponential kernel
    ``k(t) = exp(-t/τ_d) − exp(-t/τ_r)`` at that rate, then bin-averaged down to the
    camera frame rate (exposure integration). One spike per fine bin respects the
    ~3 ms refractory period. The ground-truth ``S`` is the per-frame spike *count*
    (the fine train is binned away - nothing recovers spikes faster than the frame
    rate). Indicator saturation and per-cell τ jitter are deferred to v1.1.

    Amplitude is biology and lives here as a single per-cell gain: ``brightness_cv``
    is the cell-to-cell spread of an overall expression/response gain that scales
    each cell's *whole* trace (baseline and transients together). The emitted trace
    is the **clean ground truth** ``C``; measurement noise is deliberately *not*
    added here. Photon shot noise and read noise enter at the ``sensor``, background
    fluctuations at ``neuropil`` - so any SNR is an emergent property of the physical
    chain, computable downstream, never an input.
    """

    domain: ClassVar[str] = "cell"
    kind: Literal["cell_activity"] = "cell_activity"
    requires: ClassVar[tuple[str, ...]] = ("place_neurons",)  # needs cells to animate
    spike_sim_hz: float = Field(gt=0, default=300.0, description="High-res spike-simulation rate, Hz (~300 = a ~3 ms refractory); binned to the frame rate.")
    # Defaults = CaLab's "moderate" SPIKE_ACTIVITY level; see spike_activity_params.
    p_quiescent_to_active: float = Field(gt=0, default=0.005, description="Per-frame quiescent→active transition prob.")
    p_active_to_quiescent: float = Field(gt=0, default=0.3, description="Per-frame active→quiescent transition prob.")
    active_rate_hz: float = Field(gt=0, default=150.0, description="Instantaneous firing rate while active, Hz (the in-burst rate).")
    quiescent_rate_hz: float = Field(ge=0, default=0.6, description="Instantaneous firing rate while quiescent, Hz (the intrinsic background).")
    tau_rise_s: float = Field(gt=0, default=0.05, description="Calcium rise time constant, s.")
    tau_decay_s: float = Field(gt=0, default=0.5, description="Calcium decay time constant, s.")
    brightness_cv: float = Field(
        ge=0,
        default=0.3,
        description="Cell-to-cell brightness spread: lognormal CV (mean 1) of the "
        "per-cell expression/response gain that scales the whole trace. 0 = every "
        "cell equally bright.",
    )
    f0: float = Field(ge=0, default=1.0, description="Baseline fluorescence.")
    trace_noise: float = Field(
        ge=0,
        default=0.0,
        description="Non-physical additive trace noise (default 0). An advanced "
        "override only; real noise enters at sensor/neuropil, not here.",
    )




[docs]
class CellOptics(StepSpec):
    """Per-cell diffraction + defocus ``|z − z_f|`` + scatter(z) blur & attenuation.

    No tunable fields: blur and attenuation are fully determined by each cell's
    ``z`` plus the physical ``Optics``/``Tissue`` constants on ``Acquisition``.
    Writes the observed (degraded) footprint alongside the planted (sharp) one,
    sets the geometric ``in_focus`` flag, and stores the per-cell
    ``optical_brightness`` peak scalar. ``detectable`` is *not* set here - it is
    a whole-pipeline flag (optics × illumination vs the sensor noise floor)
    assembled in ``finalize()``.
    """

    domain: ClassVar[str] = "cell"
    kind: Literal["optics"] = "optics"
    requires: ClassVar[tuple[str, ...]] = ("place_neurons",)  # degrades planted footprints




[docs]
class Composite(StepSpec):
    """Composite ``Σ_i degraded_footprint_i × trace_i`` into the movie.

    The built step's snapshot name is ``"cells_only"``. The planted (sharp)
    ``A``/``C`` remain the ideal CNMF target in ground truth.
    """

    domain: ClassVar[str] = "tissue"
    kind: Literal["composite"] = "composite"
    # Composites footprint × trace. optics is an optional enhancer (composite falls
    # back to the planted footprint), so it is not required.
    requires: ClassVar[tuple[str, ...]] = ("place_neurons", "cell_activity")




[docs]
class Neuropil(StepSpec):
    """Additive diffuse background from the dendritic/axonal felt around the cells.

    A smooth spatial field (mesh-density variation on ``spatial_sigma_um``)
    modulated by a temporal envelope that is **biologically driven**: the haze is
    the aggregate calcium of the surrounding neural processes, so its time course
    is the local population activity, lagged and smoothed by the felt's
    integration (``population_tau_s``, short). Each component's envelope mixes
    that population driver with an independent slow drift (the unmodeled
    out-of-FOV/out-of-plane tissue, ``temporal_tau_s``, slow) at
    ``population_coupling``. This is the modeled diffuse mesh only - out-of-focus
    somata are a *separate* background that emerges for free from
    ``place_neurons`` + ``optics``.
    """

    domain: ClassVar[str] = "tissue"
    kind: Literal["neuropil"] = "neuropil"
    # Couples to the local population's calcium; falls back to independent drift if
    # absent, so cell_activity is order-only (must precede when present), not required.
    requires: ClassVar[tuple[str, ...]] = ("cell_activity",)
    spatial_sigma_um: float = Field(gt=0, default=40.0, description="Spatial smoothness of the mesh, µm.")
    temporal_tau_s: float = Field(gt=0, default=10.0, description="OU correlation time of the independent slow-drift leg, s (slow).")
    population_tau_s: float = Field(gt=0, default=1.5, description="Low-pass time constant of the population-coupled leg, s: the felt's integration/lag, short relative to the drift.")
    amplitude: float = Field(gt=0, default=0.5, description="Background amplitude relative to cell signal.")
    n_components: int = Field(ge=1, default=3, description="Number of independent diffuse components.")
    population_coupling: float = Field(ge=0, le=1, default=0.7, description="Fraction of the temporal envelope driven by local population activity vs independent slow drift (0=pure drift, 1=pure population).")




[docs]
class VesselLayer(_Base):
    """One depth's worth of randomly-grown blood vessels - a dark absorbing tree.

    A vessel absorbs both the excitation going in and the emission coming out
    (haemoglobin), so a layer is a *shadow* cast on the tissue behind it, not a
    light source. Each layer grows ``n_roots`` branching trees (see
    :func:`~minisim.steps.tissue.grow_vessel_tree`) entering from the field edges,
    tapering from ``root_radius_um`` trunks down to ``min_radius_um`` capillaries by
    Murray's law, then rasterizes them to a Beer-Lambert transmission mask and blurs
    it by the **defocus + scatter** at this layer's ``depth_um`` (reusing the optics
    model): a vessel near the focal plane is a crisp dark thread, one far from focus
    a broad soft shadow. List several layers on :class:`Vasculature` to stack scales
    and depths (e.g. a shallow large-caliber cortical layer above the cells, plus a
    deeper fine-capillary bed).

    All lengths are µm; ``depth_um`` shares the cell-``z`` / focal-plane coordinate
    (0 = surface), so a layer can sit above the cells (shallower) or, less commonly,
    below them.
    """

    depth_um: float = Field(ge=0, default=30.0, description="Depth of this vessel layer below the surface, µm (same coord as cell z / focal plane).")
    n_roots: int = Field(ge=1, default=4, description="Number of vessel trees entering this layer from the field edges.")
    root_radius_um: float = Field(gt=0, default=22.0, description="Trunk (thickest) vessel radius, µm.")
    min_radius_um: float = Field(gt=0, default=2.0, description="Capillary floor: a branch terminates once Murray-law tapering drops below this, µm.")
    opacity: float = Field(gt=0, le=1, default=0.85, description="Peak absorption of a thick trunk in (0, 1]; transmission floors at 1 − opacity (real vessels are never fully black).")
    absorption_per_um: float = Field(gt=0, default=0.04, description="Beer-Lambert absorption per µm of blood path length: sets the capillary-vs-trunk contrast.")
    branch_prob: float = Field(ge=0, lt=1, default=0.2, description="Per-step bifurcation probability (a side branch peels off, the main vessel continues).")
    tortuosity_deg: float = Field(ge=0, default=8.0, description="Std of the per-step heading perturbation, degrees (0 = ruler-straight vessels).")
    branch_angle_deg: float = Field(ge=0, default=30.0, description="Base heading deviation at a bifurcation, degrees; the thinner child turns more.")
    branch_area_main: float = Field(gt=0.5, lt=1.0, default=0.7, description="Main child's share of the conserved Murray cube-sum at a bifurcation (0.5 = symmetric, →1 = a thin twig off a barely-thinned trunk).")
    step_per_radius: float = Field(gt=0, default=1.5, description="Growth step length as a multiple of the current radius.")

    @model_validator(mode="after")
    def _check_radii(self) -> VesselLayer:
        if self.min_radius_um >= self.root_radius_um:
            raise ValueError(
                f"min_radius_um ({self.min_radius_um}) must be < root_radius_um "
                f"({self.root_radius_um}); the capillary floor is below the trunk."
            )
        return self




[docs]
class Vasculature(StepSpec):
    """Dark, spatially-static absorbing vessels - a multiplicative shadow on the movie.

    Grows the ``layers`` of branching vessel trees, composites their Beer-Lambert
    transmission masks multiplicatively, and multiplies the result into the
    brain-frame movie (``movie *= M``). Because it is a *tissue*-domain effect
    applied before ``brain_motion``, the vessel pattern is fixed in the brain frame
    and rides the motion crop exactly with the cells - the high-contrast, temporally
    static landmark that motion-correction leans on (unlike the cells, whose
    brightness flickers with activity). It is also a tunable *confound*: a vessel
    crossing a soma corrupts its footprint and trace, so dialing vasculature up
    stress-tests footprint/dynamics extraction. That occlusion is scored, not
    hidden: each cell's footprint-weighted vessel burden is recorded as
    ``GroundTruth.vessel_overlap_fraction``, and a vessel over a soma dims its peak
    in the ``detectable`` test - but the footprints (``A_observed``) stay
    vessel-free, the single-cell optical truth the confound is measured against.

    **v1 simplification:** a single multiplicative attenuation of the already
    composited movie. Excitation and emission absorption are lumped into one static
    transmission, and light generated in front of the vessel is not exempted from
    the shadow (real out-of-plane / in-front fluorescence fills it in); the
    ``opacity`` floor stands in for that fill rather than modeling it. Good enough
    for a landmark and an occlusion confound; not a radiative-transfer model.

    Off by default: ``enabled=False`` and an empty ``layers`` both make the step a
    no-op, so it must be explicitly turned on with at least one
    :class:`VesselLayer`. The static absorbing mask is recorded to
    ``GroundTruth.vasculature_mask`` (cropped to the FOV) so the confound is
    scoreable. Temporal pulsation (cardiac / vasomotion) is deliberately deferred -
    the vessels are static here; only the brightness, not the position, would ever
    breathe, so the landmark property is unaffected either way.
    """

    domain: ClassVar[str] = "tissue"
    kind: Literal["vasculature"] = "vasculature"
    enabled: bool = Field(default=False, description="Master switch; with no layers the step is a no-op regardless.")
    layers: list[VesselLayer] = Field(default_factory=list, description="Vessel layers to grow, each at its own depth/caliber. Empty = no vessels.")




[docs]
class Bleaching(StepSpec):
    """Per-cell, activity-driven photobleaching, opposed by protein turnover.

    Photobleaching is a per-photon hazard, so each cell loses intact fluorophore in
    proportion to how much it emits (its calcium activity × excitation intensity),
    while turnover replenishes it toward full expression. The realized envelope is a
    *cell-domain* effect computed before ``composite`` (see
    :class:`~minisim.steps.tissue.BleachingStep`), not a global movie multiply: busy
    or brightly-lit cells fade faster and to a lower floor, and with the light off
    the pool recovers, so the same model spans single recordings and repeated
    sessions. Defaults are calibrated to measured CA1 GCaMP6f bleaching curves
    across a wide range of excitation powers (bleaching linear in excitation;
    effective recovery ≈5.5 h, so darkness restores the pool within a couple of days).
    """

    domain: ClassVar[str] = "cell"
    kind: Literal["bleaching"] = "bleaching"
    requires: ClassVar[tuple[str, ...]] = ("cell_activity",)  # bleaches each cell's emission
    bleach_susceptibility: float = Field(
        ge=0, default=6.3e-6,
        description="Bleach rate per second at unit excitation and baseline emission (the "
        "per-photon hazard); 0 disables bleaching. Calibrated to CA1 GCaMP6f.",
    )
    turnover_tau_s: float = Field(
        gt=0, default=20000.0,
        description="Effective fluorophore-recovery time constant, s (≈5.5 h, from the "
        "measured replenish rate). Restores the intact pool toward 1, opposing bleaching.",
    )
    excitation_intensity: float = Field(
        ge=0, default=1.0,
        description="Excitation level, dimensionless (1 = a typical continuous miniscope "
        "level). Deliberately unitless - absolute irradiance depends on the rig, depth, and "
        "optics. Scales the bleach rate linearly: the brighter-but-faster-fading trade-off.",
    )




[docs]
class BrainMotion(StepSpec):
    """Rigid x,y translation of the whole tissue frame - the tissue→sensor boundary.

    The built step shifts the brain-frame canvas per frame and crops the sensor
    FOV from its center; it therefore requires a scene whose tissue canvas carries
    a margin ≥ the maximum shift (``Scene.zeros(acq, margin_px=…)``, sized
    automatically by ``simulate()``), so real off-FOV tissue moves into view
    instead of a fabricated fill. Ground truth records the per-frame ``(dy, dx)``
    displacement in **pixels**.

    Three sources of the trajectory, selected by ``model``:

    * ``"physical"`` (default): a 2-D damped harmonic oscillator. The brain is a
      damped mass elastically tethered to the (rigid) skull, driven on the dominant
      ``locomotion_axis`` by an always-on locomotion rhythm at ``locomotion_freq_hz``
      (mice/rats run at ~6-8 Hz) and on both axes by broadband sloshing noise. The
      restoring force bounds the motion physically; ``motion_amplitude_um`` sets the
      typical excursion and ``max_shift_um`` is the hard safety clamp (and the margin
      size). This is the realistic model the teaching notebook uses.
    * ``"walk"``: a bounded random walk (``walk_step_um`` per frame, clamped to the
      ``max_shift_um`` disk). Cheap and rhythm-free; kept for simple tests/fixtures.
    * an explicit ``trajectory_um`` overrides both, regardless of ``model``.

    Axial focus-drift motion is a deferred placeholder.
    """

    domain: ClassVar[str] = "motion"
    kind: Literal["brain_motion"] = "brain_motion"
    model: Literal["physical", "walk"] = Field(
        default="physical",
        description="Trajectory generator: 'physical' (driven damped oscillator) or 'walk' (bounded random walk). An explicit trajectory_um overrides both.",
    )
    trajectory_um: list[tuple[float, float]] | None = Field(
        default=None, description="Explicit per-frame (dy, dx) in µm; overrides model."
    )
    max_shift_um: float = Field(
        gt=0, default=15.0,
        description="Hard safety clamp on cumulative shift magnitude, µm (also sizes the tissue margin).",
    )
    # --- physical model ---
    locomotion_freq_hz: float = Field(
        gt=0, default=7.0, description="Locomotion (stride) drive frequency, Hz; mice/rats run at ~6-8 Hz."
    )
    motion_amplitude_um: float = Field(
        gt=0, default=10.0, description="Extreme excursion (99th-percentile displacement radius), µm; most frames move less."
    )
    locomotion_axis: Literal["y", "x"] = Field(
        default="y", description="Dominant motion axis the locomotion rhythm drives (y = height; the cross axis gets noise only)."
    )
    resonance_freq_hz: float = Field(
        gt=0, default=6.0, description="Natural frequency of the brain-on-skull oscillator, Hz."
    )
    damping_ratio: float = Field(
        gt=0, default=0.5, description="Damping ratio ζ of the oscillator (<1 under-damped, sloshy; ≥1 over-damped)."
    )
    locomotion_fraction: float = Field(
        ge=0, le=1, default=0.25,
        description="Share of motion amplitude carried by the locomotion rhythm vs broadband sloshing noise (noise-dominated by default).",
    )
    # --- walk model ---
    walk_step_um: float = Field(ge=0, default=0.3, description="Random-walk step size, µm/frame (model='walk').")

    @model_validator(mode="after")
    def _amplitude_within_clamp(self) -> BrainMotion:
        if self.model == "physical" and self.motion_amplitude_um > self.max_shift_um:
            raise ValueError(
                f"motion_amplitude_um ({self.motion_amplitude_um}) exceeds the max_shift_um "
                f"clamp ({self.max_shift_um}); raise max_shift_um so the calibrated motion is "
                "not crushed by the safety clamp."
            )
        return self




[docs]
class IlluminationProfile(StepSpec):
    """Static excitation-illumination falloff - the LED lights the FOV unevenly.

    A single excitation LED illuminates the tissue brightest at the center and
    dimmer toward the edges, so peripheral cells fluoresce less to begin with.
    Modeled as a multiplicative radial falloff (``1`` at the bright center, dropping
    to ``falloff`` at the farthest corner, ``exponent`` shaping the rolloff) - fixed
    to the scope, so it does **not** move with the brain. Typically a gentle, broad
    rolloff (vs the sharper emission ``Vignette``). Being on the *excitation* side,
    this falloff also drives photobleaching faster at the bright center: that
    coupling is wired in ``Bleaching`` (which evaluates this field at each cell's
    rest position), the one way it differs from the collection-side vignette.
    """

    domain: ClassVar[str] = "sensor"
    kind: Literal["illumination_profile"] = "illumination_profile"
    falloff: float = Field(
        ge=0, le=1, default=0.7, description="Edge excitation relative to center (1 = uniform)."
    )
    exponent: float = Field(gt=0, default=2.0, description="Radial falloff exponent (gentle/broad by default).")
    center_offset_um: tuple[float, float] = Field(
        default=(0.0, 0.0), description="(dy, dx) offset of the bright center from FOV center, µm."
    )




[docs]
class Vignette(StepSpec):
    """Static radial vignette on the emission / return path (collection light loss).

    The physical return path trims light rays toward the field edges (aperture and
    relay clipping, compounded by poorer off-axis optical performance), so corners
    read dimmer regardless of how brightly the tissue was lit. Same multiplicative
    radial-falloff shape as the ``IlluminationProfile`` but on the *collection* side
    - so it does not drive bleaching - and typically a sharper edge rolloff. Also
    fixed to the scope (does not move with the brain). Off-axis blur is a separate
    concern, deferred to a future optical-aberration step.
    """

    domain: ClassVar[str] = "sensor"
    kind: Literal["vignette"] = "vignette"
    falloff: float = Field(
        ge=0, le=1, default=0.5, description="Corner brightness relative to center (1 = none)."
    )
    exponent: float = Field(gt=0, default=2.0, description="Radial falloff exponent.")
    center_offset_um: tuple[float, float] = Field(
        default=(0.0, 0.0), description="(dy, dx) offset of the bright center from FOV center, µm."
    )




[docs]
class Leakage(StepSpec):
    """Static additive baseline (stray excitation light on the detector).

    One *additive* contributor to the smooth, low-frequency background that minian's
    'glow removal' estimates and subtracts - not its sole target: that removal also
    strips the *multiplicative* illumination falloff and vignette (see
    ``IlluminationProfile`` / ``Vignette``), since all three are smooth and static
    while the cells are sharp and moving."""

    domain: ClassVar[str] = "sensor"
    kind: Literal["leakage"] = "leakage"
    profile: Literal["uniform", "gaussian"] = Field(default="gaussian", description="Spatial baseline shape.")
    level: float = Field(ge=0, default=0.1, description="Additive baseline level.")
    sigma_um: float | None = Field(
        default=None,
        description="Spatial sigma for the gaussian profile, µm; None defaults to a "
        "quarter of the smaller FOV dimension. Ignored by the uniform profile.",
    )




[docs]
class Sensor(StepSpec):
    """Photons → e⁻ → Poisson shot + read noise → ×gain → quantize → clip.

    The only step that produces integer-valued counts. The sensor *hardware*
    (QE, read noise, gain, bit depth, pixel pitch) lives on
    ``Acquisition.image_sensor`` and is read from there. The single field below
    is the exposure/flux scale - a scene property, not sensor hardware - which is
    why it stays on the step rather than the image-sensor spec.
    """

    domain: ClassVar[str] = "sensor"
    kind: Literal["sensor"] = "sensor"
    photons_per_unit: float = Field(
        gt=0,
        default=100.0,
        description="Photons per fluorescence intensity unit (exposure/flux scale); sets the "
        "shot-noise regime. A scene/illumination property, not sensor hardware.",
    )



# The v1 catalog is closed and known, so AnyStep is a hand-written static union:
# native pydantic, trivially debuggable, no import-order hazards. It is the
# single source of truth for the step catalog - adding a component means
# defining its StepSpec subclass and adding it here. Pydantic's Discriminator
# handles kind→class dispatch for deserialization, so no separate registry is
# needed; if later tooling wants an explicit map, derive it from this union.
AnyStep = Annotated[
    PlaceNeurons
    | CellActivity
    | CellOptics
    | Composite
    | Neuropil
    | Vasculature
    | Bleaching
    | BrainMotion
    | IlluminationProfile
    | Vignette
    | Leakage
    | Sensor,
    Discriminator("kind"),
]


# ---------------------------------------------------------------------------
# Top-level Spec + cross-field validation
# ---------------------------------------------------------------------------



[docs]
class Spec(_Base):
    """A complete, reproducible recording specification.

    ``acquisition`` + ``steps`` (the ordered pipe) + ``seed`` + ``output`` fully
    determine a recording. The cross-field validators below catch genuinely
    invalid configs (raise) and flag unusual-but-legal ones (``SpecWarning``).
    """

    acquisition: Acquisition = Field(default_factory=Acquisition)
    seed: int = Field(default=42, description="RNG seed for full reproducibility.")
    steps: list[AnyStep]
    output: Output = Field(default_factory=Output)

    def cache_key(self) -> str:
        """SHA256 (first 16 hex chars) of the canonical JSON form. Stable across runs."""
        return hashlib.sha256(self.model_dump_json().encode()).hexdigest()[:16]

    @field_validator("steps")
    @classmethod
    def _canonicalize_order(cls, steps: list[AnyStep]) -> list[AnyStep]:
        """Store steps in canonical order (:func:`order_steps`), whatever order they
        were given, so every downstream consumer - ``simulate``, ``until=``, the
        snapshot keys, ``cache_key``, sweeps - sees the same sequence and two specs
        that differ only in listing order compare and cache as equal.
        """
        return order_steps(steps)

    @model_validator(mode="after")
    def _validate(self) -> Spec:
        self._check_unique_kinds()  # hard fail; everything below assumes unique kinds
        self._check_step_dependencies()
        by_kind = {s.kind: s for s in self.steps}
        self._check_footprint_vs_fov(by_kind)
        self._check_sampling_vs_kinetics(by_kind)
        self._warn_focal_plane(by_kind)
        self._warn_motion_magnitude(by_kind)
        return self

    # -- hard fails ---------------------------------------------------------

    def _check_unique_kinds(self) -> None:
        """Rule 4: each ``kind`` appears at most once - lets sweeps address a step
        by kind and keeps the snapshot dict (keyed by step name) collision-free."""
        dupes = sorted(k for k, n in Counter(s.kind for s in self.steps).items() if n > 1)
        if dupes:
            raise ValueError(f"Duplicate step kind(s) in spec: {dupes}. Each kind must be unique.")

    def _check_step_dependencies(self) -> None:
        """Rule 4b: a step's present ``requires`` kinds must precede it. Steps are
        already canonicalized and :data:`_PIPELINE_ORDER` is a topological extension
        of ``requires``, so this is a drift guard (an assertion that the two stay in
        sync) rather than a reachable error for known kinds."""
        present = {s.kind for s in self.steps}
        seen: set[str] = set()
        for s in self.steps:
            late = [r for r in s.requires if r in present and r not in seen]
            if late:
                raise ValueError(
                    f"Step {s.kind!r} resolves after {late}, which it consumes through "
                    "the shared Scene; the canonical pipeline order is inconsistent with "
                    f"its declared requires={s.requires!r}."
                )
            seen.add(s.kind)

    def _check_footprint_vs_fov(self, by_kind: Mapping[str, StepSpec]) -> None:
        """Rule 2: a soma larger than the entire FOV is a misconfiguration."""
        pc = by_kind.get("place_neurons")
        if not isinstance(pc, PlaceNeurons):
            return
        min_fov = min(self.acquisition.fov_um)
        max_radius = max(p.soma_radius_um for p in pc.resolved_populations)
        if 2 * max_radius > min_fov:
            raise ValueError(
                f"soma_radius_um={max_radius} µm gives a soma diameter larger than "
                f"the FOV ({min_fov:.3g} µm). Reduce the soma or enlarge the FOV."
            )

    def _check_sampling_vs_kinetics(self, by_kind: Mapping[str, StepSpec]) -> None:
        """Rule 3: ``tau_decay_s · fps`` must be ≳ 1, else the decay is unresolvable."""
        act = by_kind.get("cell_activity")
        if not isinstance(act, CellActivity):
            return
        samples_per_decay = act.tau_decay_s * self.acquisition.fps
        if samples_per_decay < 1.0:
            raise ValueError(
                f"tau_decay_s={act.tau_decay_s} s at fps={self.acquisition.fps} gives "
                f"{samples_per_decay:.3g} samples per decay (< 1); the calcium decay is "
                "unresolvable. Raise fps or tau_decay_s."
            )

    # -- advisory warnings --------------------------------------------------

    def _warn_focal_plane(self, by_kind: Mapping[str, StepSpec]) -> None:
        """Rule 6: a numeric focal depth outside the cell depth range is unusual."""
        focal = self.acquisition.focal_depth_in_tissue_um
        pc = by_kind.get("place_neurons")
        if focal == "auto" or not isinstance(pc, PlaceNeurons):
            return
        lo, hi = pc.depth_range_um
        if not (lo <= focal <= hi):
            warnings.warn(
                f"focal_depth_in_tissue_um={focal} µm is outside the cell depth range ({lo}, {hi}) µm.",
                SpecWarning,
                stacklevel=2,
            )

    def _warn_motion_magnitude(self, by_kind: Mapping[str, StepSpec]) -> None:
        """Rule 7: shifts beyond ~5% of the FOV likely indicate a misconfig."""
        mot = by_kind.get("brain_motion")
        if not isinstance(mot, BrainMotion):
            return
        min_fov = min(self.acquisition.fov_um)
        if mot.trajectory_um is not None:
            extent = max((max(abs(dy), abs(dx)) for dy, dx in mot.trajectory_um), default=0.0)
        elif mot.model == "physical":
            extent = mot.motion_amplitude_um
        else:
            extent = mot.max_shift_um
        if extent > 0.05 * min_fov:
            warnings.warn(
                f"Motion extent {extent:.3g} µm exceeds 5% of the FOV ({min_fov:.3g} µm); "
                "likely a misconfiguration.",
                SpecWarning,
                stacklevel=2,
            )