"""High-level "run a job" convenience — classic QC-program workflow.

A single call writes the output text file, the molden orbital file, and
(for geometry optimization) a trajectory animation — the kind of shape
users expect from Gaussian / ORCA / NWChem:

    from vibeqc import Atom, Molecule
    from vibeqc.runner import run_job

    mol = Molecule.from_xyz("h2o.xyz")
    run_job(mol, basis="6-31g*", method="rhf", output="h2o")
    # -> h2o.out, h2o.molden

    run_job(mol, basis="6-31g*", method="rks", functional="PBE",
            optimize=True, output="h2o_opt")
    # -> h2o_opt.out, h2o_opt.molden, h2o_opt.traj

All text output goes to ``{output}.out``. The molden file is written iff
the method produces molecular orbitals (RHF / UHF / RKS / UKS) and
``write_molden=True``. Geometry optimization uses ASE's BFGS and writes
one frame per step to ``{output}.traj``.
"""

from __future__ import annotations

import os
import time
from pathlib import Path
from typing import Any, Literal, Optional, Union

from ._vibeqc_core import (
    BasisSet,
    D3BJParams,
    Molecule,
    RHFOptions,
    RKSOptions,
    UHFOptions,
    UKSOptions,
    get_num_threads,
    run_rhf,
    run_rks,
    run_uhf,
    run_uks,
    set_num_threads,
)
from .banner import VIBEQC_VERSION, banner, library_versions
from .composites import (
    Availability,
    CompositeRecipe,
    CompositeUnavailable,
    resolve_composite,
)
from .crash_dump import crash_dump_context, dump_on_failure
from .dispersion import compute_d3bj, d3bj_params_for
from .dispersion_d4 import compute_d4, dftd4_available
from .gcp import GCPDataMissing, compute_gcp
from .io.molden import write_molden
from .memory import check_memory, estimate_memory, format_memory_report
from .output import (
    OutputPlan,
    OutputWriter,
    dry_run_manifest,
    is_dry_run_requested,
    parse_write_cube_kwarg,
    requested_mo_indices,
    write_cube_density_for_run_job,
    write_cube_mo_for_run_job,
    write_population,
    write_xyz,
)
from .output.citations import (
    format_references_block,
    load_default_database,
    write_bibtex,
    write_references,
)
from .perf import PerfScope
from .perf import perf_log as _perf_log_ctx
from .progress import ProgressLogger, resolve_progress
from .scf_log import format_scf_trace
from .solvers import (
    DMRGOptions,
    Hamiltonian,
    SelectedCIOptions,
    SolverResult,
    TranscorrelatedOptions,
    V2RDMOptions,
    build_hamiltonian_mo,
    build_transcorrelated_hamiltonian,
    get_hf_orbital_provider,
    solve_dmrg,
    solve_selected_ci,
    solve_v2rdm,
)
from .structured_log import (
    StructuredLog,
    run_fingerprint,
)
from .structured_log import (
    structured_log as _structured_log_ctx,
)

DispersionSpec = Optional[object]  # str functional name | D3BJParams | None


def _resolve_dispersion(
    dispersion: DispersionSpec,
    functional: Optional[str],
) -> Optional[D3BJParams]:
    """Normalize the ``dispersion=`` argument into a D3BJParams object,
    or None if no dispersion correction is requested.

    * ``None`` or empty string → no dispersion.
    * ``True`` / ``"d3bj"`` → use D3-BJ params for the current functional.
    * ``"pbe" / "b3lyp" / ...`` → use D3-BJ params for that functional
      explicitly (useful for HF + D3-BJ).
    * :class:`D3BJParams` instance → used as-is.
    """
    if dispersion is None or dispersion is False:
        return None
    if isinstance(dispersion, D3BJParams):
        return dispersion
    if isinstance(dispersion, str):
        key = dispersion.strip().lower()
        if key in ("", "none", "false"):
            return None
        if key in ("d3bj", "true", "yes", "on"):
            if not functional:
                raise ValueError(
                    "dispersion='d3bj' requires a DFT functional so we can "
                    "look up damping parameters; pass functional='pbe', "
                    "etc., or give a functional name directly as the "
                    "dispersion argument."
                )
            lookup = functional
        else:
            lookup = dispersion
        p = d3bj_params_for(lookup)
        if p is None:
            raise ValueError(
                f"dispersion={dispersion!r}: no D3-BJ parameters found "
                f"for functional {lookup!r} (neither builtin nor dftd3 "
                f"backend knows this name). Install the dftd3 backend "
                f"for the full Grimme parameter set: "
                f"`pip install dftd3` into your vibe-qc venv, or "
                f"`pip install -e '.[dispersion]'` from the repo checkout."
            )
        return p
    if dispersion is True:
        if not functional:
            raise ValueError(
                "dispersion=True requires a DFT functional; pass functional='pbe', etc."
            )
        return d3bj_params_for(functional)
    raise TypeError(
        f"dispersion must be None, bool, str, or D3BJParams; got "
        f"{type(dispersion).__name__}"
    )


class _DispersionAugmented:
    """Transparent wrapper around an SCF result that exposes the
    dispersion contribution as extra attributes without overriding the
    raw SCF ``.energy``. Users opt in to the total energy via
    :attr:`energy_total`; everything else (mo_energies, density, fock,
    converged, n_iter, scf_trace, …) forwards to the underlying
    pybind11 SCF-result struct.

    We wrap rather than mutate because the C++ result classes have
    ``def_readonly`` fields — they can't be extended in place from
    Python.
    """

    __slots__ = ("_scf", "e_dispersion", "dispersion_params")

    def __init__(self, scf_result, e_dispersion: float, params: D3BJParams):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "e_dispersion", e_dispersion)
        object.__setattr__(self, "dispersion_params", params)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + dispersion energy (Hartree)."""
        return float(self._scf.energy) + float(self.e_dispersion)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}(energy={self._scf.energy:.10f}, "
            f"e_dispersion={self.e_dispersion:+.6e}, "
            f"energy_total={self.energy_total:.10f})"
        )


Method = Literal[
    "rhf",
    "rks",
    "uhf",
    "uks",
    "auto",
    "selected_ci",
    "dmrg",
    "v2rdm",
    "transcorrelated_ci",
    "fci",
    "ccsd",
    "ccsd(t)",
    # Composite 3c keywords (v0.9.0 — see vibeqc.composites). The
    # dispatcher resolves these via _apply_composite before any of the
    # other Method literal branches see them.
    "hf-3c",
    "pbeh-3c",
    "b97-3c",
    "b3lyp-3c",
    "r2scan-3c",
    "wb97x-3c",
    "hse-3c",
]


def _select_method(
    method: Method, molecule: Molecule, functional: Optional[str]
) -> str:
    """Resolve ``method='auto'`` against molecule.multiplicity + functional.

    Heuristic wavefunction routing by electron count:
    - ≤4 electrons → FCI (exact for tiny systems)
    - ≤8 electrons → Selected-CI (systematically improvable)
    - >8 electrons → HF (via SCF)

    ``ccsd`` and ``ccsd(t)`` run RHF SCF first, then post-SCF coupled-cluster."""
    if method == "auto":
        if functional:
            return "uks" if molecule.multiplicity > 1 else "rks"
        n_elec = molecule.n_electrons()
        if n_elec <= 4:
            return "fci"
        if n_elec <= 8:
            return "selected_ci"
        return "uhf" if molecule.multiplicity > 1 else "rhf"
    if method in ("ccsd", "ccsd(t)"):
        return "rhf"
    return method


def _apply_composite(
    method: str,
    *,
    basis: Optional[str],
    functional: Optional[str],
    dispersion,
) -> tuple[str, str, Optional[str], object, Optional[CompositeRecipe]]:
    """Resolve a composite-3c ``method=`` keyword (``"hf-3c"``,
    ``"pbeh-3c"``, …) into the (method, basis, functional, dispersion,
    recipe) tuple the rest of :func:`run_job` consumes.

    Non-composite ``method`` values pass through unchanged with
    ``recipe = None``. For a composite:

    * basis / functional / dispersion are taken from the recipe when
      the caller left them at defaults; an explicit caller basis wins
      with a warning (deliberate-experiment escape hatch).
    * D3-BJ recipes carry their own re-fit damping (``d3bj_damping``);
      it is passed straight through as a :class:`D3BJParams` so the
      dispersion module doesn't fall back to a wrong per-functional
      lookup.
    * Recipes flagged ``PENDING_*`` raise :class:`CompositeUnavailable`
      with a pointer at the gating roadmap item.
    """
    recipe = resolve_composite(method)
    if recipe is None:
        return method, basis or "", functional, dispersion, None

    if recipe.availability in (
        Availability.PENDING_F1,
        Availability.PENDING_F3,
    ):
        raise CompositeUnavailable(
            f"method={method!r}: {recipe.availability.value}. "
            f"This composite depends on infrastructure not yet on "
            f"main. See docs/user_guide/composites.md § Availability.\n"
            f"  Notes: {recipe.notes}"
        )

    resolved_basis = basis if basis else recipe.basis
    if basis and basis.lower() != recipe.basis.lower():
        import warnings

        warnings.warn(
            f"method={method!r} specifies basis={recipe.basis!r}, but "
            f"caller passed basis={basis!r}. Honouring the explicit "
            f"basis — the published 3c parameters were fit at the "
            f"composite's native basis and may not transfer.",
            stacklevel=2,
        )
    resolved_functional = functional if functional else recipe.functional

    if dispersion is None:
        if recipe.dispersion == "d3bj" and recipe.d3bj_damping is not None:
            resolved_dispersion: object = D3BJParams(
                s6=recipe.d3bj_damping.s6,
                s8=recipe.d3bj_damping.s8,
                a1=recipe.d3bj_damping.a1,
                a2=recipe.d3bj_damping.a2,
            )
        else:
            resolved_dispersion = recipe.dispersion
    else:
        resolved_dispersion = dispersion

    # The downstream _select_method refines restricted vs unrestricted
    # from multiplicity; we just hand it "auto".
    return "auto", resolved_basis, resolved_functional, resolved_dispersion, recipe


def _detect_direct_scf(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
    basis_name: str,
    molecule: Molecule,
) -> bool:
    """Return True when the SCF will use the direct (integral-driven)
    Fock build path — either because ``scf_mode=DIRECT`` was explicitly
    set, or because AUTO resolved to DIRECT (basis larger than the
    auto threshold, and density_fit/cosx are off).

    Conservative: returns False when we can't determine the resolved
    mode (no options struct, or density_fit/cosx supersedes).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return False

    # density_fit + cosx supersede the four-index path — direct SCF
    # screening is not used, so the direct-SCF references don't apply.
    if getattr(opts, "density_fit", False):
        return False

    scf_mode = getattr(opts, "scf_mode", None)
    if scf_mode is None:
        return False

    from ._vibeqc_core import SCFMode

    if scf_mode == SCFMode.DIRECT:
        return True
    if scf_mode == SCFMode.AUTO:
        threshold = getattr(opts, "scf_mode_auto_threshold", 200)
        n_bf = BasisSet(molecule, basis_name).nbasis
        return n_bf > threshold
    # CONVENTIONAL — not direct.
    return False


def _run_single_point(
    method: str,
    molecule: Molecule,
    basis: BasisSet,
    *,
    functional: Optional[str],
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
    selected_ci_options: Optional[SelectedCIOptions] = None,
    dmrg_options: Optional[DMRGOptions] = None,
    v2rdm_options: Optional[V2RDMOptions] = None,
    transcorrelated_options: Optional[TranscorrelatedOptions] = None,
    active_space: Optional[tuple[int, int]] = None,
    solvent: object = None,
):
    """Single-point dispatcher. ``solvent`` (v0.9.0) reroutes through
    :func:`vibeqc.solvation.run_cpcm_scf` and returns the underlying
    SCF result with an ``e_solv`` / ``solvent_result`` attribute
    attached for downstream output writers."""
    method_opts = {
        "rhf": rhf_options or RHFOptions(),
        "uhf": uhf_options or UHFOptions(),
    }
    if method in ("rks", "uks"):
        opts = (rks_options if method == "rks" else uks_options) or (
            RKSOptions() if method == "rks" else UKSOptions()
        )
        opts.functional = functional or opts.functional or "lda"
        method_opts[method] = opts

    if solvent is not None and method in ("rhf", "uhf", "rks", "uks"):
        # CPCM macro-iteration wrapper. The C++ result types
        # (RHFResult / UHFResult / RKSResult / UKSResult) are
        # pybind11 ``def_readonly`` — Python attribute set on them
        # silently fails inside try/except. Wrap the inner result in
        # an attribute-forwarding proxy so callers can still write
        # ``result.energy`` / ``result.density`` / etc., and *also*
        # read the new solvation diagnostics (``result.solvent_result``,
        # ``result.e_solv``, ``result.energy_in_solvent``).
        from .solvation import run_cpcm_scf
        from .solvation.driver import _solvent_aware_scf_result

        sol = run_cpcm_scf(
            molecule,
            basis,
            method=method,
            solvent=solvent,
            options=method_opts[method],
        )
        return _solvent_aware_scf_result(sol)

    if method == "rhf":
        return run_rhf(molecule, basis, method_opts["rhf"])
    if method == "uhf":
        return run_uhf(molecule, basis, method_opts["uhf"])
    if method == "rks":
        return run_rks(molecule, basis, method_opts["rks"])
    if method == "uks":
        return run_uks(molecule, basis, method_opts["uks"])

    # ── Non-mean-field wavefunction methods ──
    if method in ("selected_ci", "dmrg", "v2rdm", "transcorrelated_ci"):
        import numpy as np

        hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        H = build_hamiltonian_mo(molecule, basis, C)

        # Optional active-space truncation
        if active_space is not None:
            n_active, n_elec = active_space
            n_frozen = max(0, H.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            H = build_hamiltonian_mo(molecule, basis, C_act)
            H.nelec = n_elec
            H.ms2 = 0

        if method == "transcorrelated_ci":
            if transcorrelated_options is None:
                transcorrelated_options = TranscorrelatedOptions()
            H = build_transcorrelated_hamiltonian(H, transcorrelated_options)
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "selected_ci":
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "dmrg":
            return solve_dmrg(H, dmrg_options or DMRGOptions())

        if method == "v2rdm":
            return solve_v2rdm(H, v2rdm_options or V2RDMOptions())

    if method == "fci":
        import numpy as np
        from scipy.linalg import eigh

        from .solvers import (
            build_hamiltonian_matrix_unrestricted,
            diagonal_matrix_element_unrestricted,
            generate_determinants,
        )

        hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        ham = build_hamiltonian_mo(molecule, basis, C)

        if active_space is not None:
            n_active, n_elec = active_space
            n_frozen = max(0, ham.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            ham = build_hamiltonian_mo(molecule, basis, C_act)
            ham.nelec = n_elec

        norb = ham.norb
        nelec = ham.nelec
        nalpha = (nelec + ham.ms2) // 2
        nbeta = (nelec - ham.ms2) // 2

        all_dets = generate_determinants(norb, nalpha, nbeta)
        H = build_hamiltonian_matrix_unrestricted(all_dets, ham.h1e, ham.h2e)
        evals, evecs = eigh(H)
        e_fci = evals[0] + ham.nuclear_repulsion

        return SolverResult(
            energy=e_fci,
            method=f"fci(ndet={len(all_dets)})",
            converged=True,
            n_iter=1,
            energy_trace=[e_fci],
            ci_coeffs=evecs[:, 0],
            ci_labels=all_dets,
        )

    raise ValueError(
        f"Unknown method {method!r}; use 'rhf', 'uhf', 'rks', 'uks', "
        f"'selected_ci', 'dmrg', 'v2rdm', 'transcorrelated_ci', 'fci', or 'auto'"
    )


def _format_solver_trace(result: SolverResult) -> str:
    """Format a non-mean-field solver result for the .out file."""
    import numpy as np

    lines = []
    lines.append(f"  Method:            {result.method}")
    lines.append(f"  Total energy:      {result.energy:16.10f} Ha")
    lines.append(f"  Converged:         {result.converged}")
    lines.append(f"  Iterations/sweeps: {result.n_iter}")
    if result.pt2_correction is not None and abs(result.pt2_correction) > 1e-12:
        e_var = result.energy - result.pt2_correction
        lines.append(f"  E(variational):    {e_var:16.10f} Ha")
        lines.append(f"  E(PT2 correction): {result.pt2_correction:16.10f} Ha")
    if result.bond_dim is not None:
        lines.append(f"  Bond dimension:    {result.bond_dim}")
    if result.truncation_error is not None:
        lines.append(f"  Truncation error:  {result.truncation_error:.2e}")
    if result.constraint_residual is not None:
        lines.append(f"  Constraint resid.: {result.constraint_residual:.2e}")
    if result.ci_labels is not None and result.ci_coeffs is not None:
        lines.append(f"  Determinants:      {len(result.ci_labels)}")
        c_abs = np.abs(result.ci_coeffs)
        n_show = min(8, len(c_abs))
        idx = np.argsort(-c_abs)
        lines.append("  Leading configurations:")
        norb = 0
        for rank in range(n_show):
            i = idx[rank]
            label = result.ci_labels[i]
            # Format SpinDet as α|...⟩ β|...⟩, Det as |...⟩
            if (
                isinstance(label, tuple)
                and len(label) == 2
                and isinstance(label[0], tuple)
            ):
                a_str = "".join(
                    "1" if j in label[0] else "0"
                    for j in range(max(label[0]) + 1 if label[0] else 0)
                )
                b_str = "".join(
                    "1" if j in label[1] else "0"
                    for j in range(max(label[1]) + 1 if label[1] else 0)
                )
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  α|{a_str}⟩ β|{b_str}⟩")
            else:
                norb = max(norb, max(label) + 1 if label else 0)
                occ_str = "".join("1" if j in label else "0" for j in range(norb))
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  |{occ_str}⟩")
    if result.energy_trace:
        lines.append("  Energy trace:")
        for i, e in enumerate(result.energy_trace):
            lines.append(f"    iter {i + 1:4d}:  E = {e:16.10f} Ha")
    return "\n".join(lines)


def _geom_summary(molecule: Molecule) -> str:
    """Three-column per-atom geometry listing (bohr)."""
    lines = ["  Atoms (bohr)", "  " + "-" * 54]
    for i, atom in enumerate(molecule.atoms, start=1):
        lines.append(
            f"  {i:4d}  Z={atom.Z:3d}   "
            f"{atom.xyz[0]:14.8f}  {atom.xyz[1]:14.8f}  {atom.xyz[2]:14.8f}"
        )
    lines.append(
        f"  charge={molecule.charge}  multiplicity={molecule.multiplicity}  "
        f"n_electrons={molecule.n_electrons()}"
    )
    return "\n".join(lines)


def _job_header(method: str, basis_name: str, functional: Optional[str]) -> str:
    label = f"{method.upper()}"
    if method in ("rks", "uks") and functional:
        label = f"{label} / {functional}"
    if method in ("ccsd", "ccsd(t)"):
        label = f"RHF + {method.upper()}"
    return f"  Job: {label}  basis={basis_name}"


def _make_wavefunction_ase_calculator(
    molecule: Molecule,
    basis_name: str,
    method: str,
    *,
    functional=None,
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    active_space=None,
):
    """Return an ASE Calculator that computes energy via the wavefunction
    solver and forces via central finite differences (h = 0.001 bohr)."""
    try:
        from ase.calculators.calculator import Calculator
        from ase.units import Bohr, Hartree
    except ImportError:
        raise ImportError("Geometry optimization requires ASE: pip install ase")

    import numpy as np

    from ._vibeqc_core import Atom, BasisSet
    from ._vibeqc_core import Molecule as _Molecule

    h = 0.001  # bohr — central-finite-difference step

    class _WavefunctionCalculator(Calculator):
        implemented_properties = ["energy", "forces"]

        def calculate(self, atoms, properties, system_changes):
            # Convert ASE Atoms → vibeqc Molecule
            positions_bohr = atoms.positions / Bohr
            mol = _Molecule(
                [
                    Atom(int(z), list(xyz))
                    for z, xyz in zip(atoms.numbers, positions_bohr)
                ],
                charge=molecule.charge,
                multiplicity=molecule.multiplicity,
            )
            basis = BasisSet(mol, basis_name)

            # Compute energy at current geometry
            result = _run_single_point(
                method,
                mol,
                basis,
                functional=functional,
                selected_ci_options=selected_ci_options,
                dmrg_options=dmrg_options,
                v2rdm_options=v2rdm_options,
                transcorrelated_options=transcorrelated_options,
                active_space=active_space,
            )
            energy_ha = float(getattr(result, "energy", 0.0))
            self.results["energy"] = energy_ha * Hartree

            # Numerical forces via central differences
            if "forces" in properties:
                forces_ha_bohr = np.zeros((len(atoms), 3))
                for i in range(len(atoms)):
                    for c in range(3):
                        # +h displacement
                        pos_plus = positions_bohr.copy()
                        pos_plus[i, c] += h
                        mol_plus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_plus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_plus = BasisSet(mol_plus, basis_name)
                        result_plus = _run_single_point(
                            method,
                            mol_plus,
                            basis_plus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            active_space=active_space,
                        )
                        e_plus = float(getattr(result_plus, "energy", 0.0))

                        # -h displacement
                        pos_minus = positions_bohr.copy()
                        pos_minus[i, c] -= h
                        mol_minus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_minus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_minus = BasisSet(mol_minus, basis_name)
                        result_minus = _run_single_point(
                            method,
                            mol_minus,
                            basis_minus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            active_space=active_space,
                        )
                        e_minus = float(getattr(result_minus, "energy", 0.0))

                        # Central difference: F = -dE/dR
                        forces_ha_bohr[i, c] = -(e_plus - e_minus) / (2 * h)

                self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)

    return _WavefunctionCalculator()


def _optimize_geometry(
    molecule: Molecule,
    basis_name: str,
    *,
    functional: Optional[str],
    trajectory_path: Path,
    fmax: float,
    max_steps: int,
    dispersion_params: Optional[D3BJParams] = None,
    method: str = "rhf",
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    active_space=None,
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
) -> Molecule:
    """Run an ASE/BFGS geometry optimization, writing frames to a .traj file.

    Returns a new Molecule at the optimized geometry. The SCF options
    apply to every intermediate geometry, not just the final point — use
    them to bump ``max_iter`` / add damping for systems where BFGS
    sometimes lands on a hard-to-converge geometry (H-bonded clusters,
    near-degenerate states).

    For wavefunction methods (``selected_ci``, ``dmrg``, ``v2rdm``,
    ``transcorrelated_ci``), forces are computed via central finite
    differences because the solvers do not yet provide analytic gradients.
    """
    try:
        from ase import Atoms
        from ase.io.trajectory import Trajectory
        from ase.optimize import BFGSLineSearch
        from ase.units import Bohr
    except ImportError as exc:
        raise ImportError(
            "Geometry optimization requires ASE. Install it with "
            "`pip install ase` into your vibe-qc venv."
        ) from exc

    from .ase import VibeQC

    positions_ang = [[coord * Bohr for coord in atom.xyz] for atom in molecule.atoms]
    atoms = Atoms(
        numbers=[atom.Z for atom in molecule.atoms],
        positions=positions_ang,
    )

    wavefunction_methods = {"selected_ci", "dmrg", "v2rdm", "transcorrelated_ci"}
    if method in wavefunction_methods:
        atoms.calc = _make_wavefunction_ase_calculator(
            molecule,
            basis_name,
            method,
            functional=functional,
            selected_ci_options=selected_ci_options,
            dmrg_options=dmrg_options,
            v2rdm_options=v2rdm_options,
            transcorrelated_options=transcorrelated_options,
            active_space=active_space,
        )
    else:
        atoms.calc = VibeQC(
            basis=basis_name,
            charge=molecule.charge,
            multiplicity=molecule.multiplicity,
            functional=functional,
            dispersion=dispersion_params,
            rhf_options=rhf_options,
            uhf_options=uhf_options,
            rks_options=rks_options,
            uks_options=uks_options,
        )

    with Trajectory(str(trajectory_path), "w", atoms) as traj:
        # BFGSLineSearch refuses uphill steps by construction — essential
        # on flat / weakly-bound PESs (H-bonded clusters, dispersion-dominated
        # complexes) where plain BFGS's Hessian extrapolation routinely
        # pushes atoms apart. Works on covalent minima too, so we use it
        # uniformly. Default ASE line-search parameters are well-tuned;
        # don't clamp maxstep here or the line search can't converge.
        opt = BFGSLineSearch(atoms, logfile=None)
        opt.attach(traj)
        opt.run(fmax=fmax, steps=max_steps)

    # Re-build a vibe-qc Molecule at the optimized geometry.
    from ._vibeqc_core import Atom as _Atom

    new_positions_bohr = atoms.positions / Bohr
    optimized = Molecule(
        [_Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, new_positions_bohr)],
        molecule.charge,
        molecule.multiplicity,
    )
    return optimized


def _resolve_optimizer_backend(requested: str) -> str:
    """Resolve ``optimizer_backend`` to ``"ase"`` or ``"native"``."""
    if requested == "ase":
        try:
            import ase  # noqa: F401
        except ImportError:
            raise ImportError(
                "optimizer_backend='ase' requires ASE. Install with "
                "`pip install ase` or use optimizer_backend='native'."
            ) from None
        return "ase"
    if requested == "native":
        return "native"
    if requested != "auto":
        raise ValueError(
            f"optimizer_backend={requested!r} — expected 'auto', 'ase', or 'native'."
        )
    # "auto": prefer ASE if installed, otherwise native.
    try:
        import ase  # noqa: F401
    except ImportError:
        return "native"
    return "ase"


def run_job(
    molecule: Molecule,
    *,
    basis: Optional[str] = None,
    method: Method = "auto",
    functional: Optional[str] = None,
    output: str | os.PathLike = "output",
    optimize: bool = False,
    write_molden_file: bool = True,
    write_xyz_file: bool = True,
    write_population_file: bool = True,
    write_cube: Union[bool, str, int, list, tuple, None] = False,
    cube_spacing: float = 0.2,
    cube_padding: float = 4.0,
    citations: bool = True,
    dry_run: bool = False,
    fmax: float = 0.05,
    max_opt_steps: int = 200,
    optimizer_backend: str = "auto",
    memory_override: bool = False,
    num_threads: Optional[int] = None,
    dispersion: DispersionSpec = None,
    solvent: object = None,
    record_hostname: bool = True,
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
    selected_ci_options: Optional[SelectedCIOptions] = None,
    dmrg_options: Optional[DMRGOptions] = None,
    v2rdm_options: Optional[V2RDMOptions] = None,
    transcorrelated_options: Optional[TranscorrelatedOptions] = None,
    active_space: Optional[tuple[int, int]] = None,
    progress: Union[bool, ProgressLogger, None] = None,
    verbose: Optional[int] = None,
    use_logging: bool = False,
    perf_log: Optional[Union[str, os.PathLike, bool]] = None,
    structured_log: Union[bool, str, os.PathLike, None] = False,
    crash_dump: Union[bool, str, os.PathLike, None] = True,
    # QVF visualisation archive (v1).
    output_qvf: bool = False,
    hessian: bool = False,
) -> object:
    """Run a vibe-qc SCF job and write the standard output files.

    Parameters
    ----------
    molecule
        The :class:`Molecule` describing the system (bohr coordinates).
    basis
        libint-recognized basis-set name.
    method
        ``"rhf"``, ``"uhf"``, ``"rks"``, ``"uks"``, ``"auto"``, ``"ccsd"``,
        or ``"ccsd(t)"`` (picks
        restricted vs unrestricted from ``molecule.multiplicity`` and
        switches between HF/KS based on whether ``functional`` is set).
    functional
        XC functional for RKS / UKS (e.g. ``"PBE"``, ``"B3LYP"``). Ignored
        for HF.
    output
        Path stem for the generated files. ``{output}.out`` always; also
        ``{output}.molden`` unless disabled; and ``{output}.traj`` when
        ``optimize=True``.
    optimize
        Run a BFGS geometry optimization first (via ASE), then the final
        SCF on the optimized geometry. The trajectory is written for
        animation (openable with ASE-aware viewers).
    write_molden_file
        Emit ``{output}.molden`` at the converged geometry. Default True.
    fmax, max_opt_steps
        Optimizer tolerance (eV/Å) and iteration limit. Ignored unless
        ``optimize=True``.
    memory_override
        If ``False`` (default), the driver estimates peak memory and
        aborts with :class:`InsufficientMemoryError` when the estimate
        exceeds the machine's available RAM. Set to ``True`` to
        proceed anyway — at the risk of swap-thrashing or a system
        freeze.
    num_threads
        If set, pin the OpenMP thread count for the duration of the
        calculation. ``None`` (default) leaves the current setting in
        place — which is usually "all cores" unless the environment
        variable ``OMP_NUM_THREADS`` is set or
        :func:`vibeqc.set_num_threads` was called earlier. The actual
        thread count used is recorded in the output log for
        reproducibility.
    dispersion
        Post-SCF D3(BJ) dispersion correction. Accepts:

        * ``None`` (default) — no dispersion.
        * ``True`` or ``"d3bj"`` — use D3-BJ params for the current DFT
          functional.
        * A functional name (``"pbe"``, ``"b3lyp"``, …) — use its D3-BJ
          params (useful for ``method="rhf"`` + ``"hf"`` dispersion, or
          for overriding the SCF functional in the damping lookup).
        * A :class:`D3BJParams` instance — used directly.

        The energy correction is written to the ``.out`` file, added to
        the returned object as ``e_dispersion`` / ``energy_total`` (the
        raw SCF ``.energy`` is preserved untouched), and, when
        ``optimize=True``, added to the forces the optimizer sees.
        Routes through :func:`vibeqc.compute_d3bj` with ``backend="auto"``
        — the reference ``dftd3`` backend is used when installed,
        otherwise the D1a framework stub. See
        :mod:`vibeqc.dispersion` for details.
    solvent
        v0.9.0 CPCM / COSMO implicit solvation. Accepts:

        * ``None`` (default) — gas-phase SCF.
        * A preset name (``"water"``, ``"dmso"``, ``"acetonitrile"``,
          ``"chloroform"``, ``"benzene"``, …) — looks up the static
          dielectric ε from :data:`vibeqc.SOLVENT_PRESETS`.
        * A numeric ε (e.g. ``78.39``) — custom dielectric.
        * A dict (``{"epsilon": 25.0, "variant": "cosmo", ...}``) or a
          :class:`vibeqc.SolventModel` instance for full control over
          cavity construction (Bondi radii, Lebedev order, switching
          width, max macro-iterations).

        Routes through :func:`vibeqc.run_cpcm_scf`; macro-iterates the
        apparent surface charge against the SCF density until
        ΔE_solv < 1e-6 Ha (typically 3–5 outer cycles). The total
        energy is the in-solvent value; the gas-phase reference is
        retained on ``result.solvent_result.e_gas``. See
        :doc:`/user_guide/solvation` for the full theory and the
        cavity / preset table.
    record_hostname
        If ``False``, the per-job ``{output}.system`` manifest writes
        ``hostname = "<redacted>"`` instead of the live hostname. The
        ``VIBEQC_NO_HOSTNAME=1`` environment variable does the same
        thing globally — engineering's bundled docs runs use the env
        var so example outputs in ``docs/_static/examples/`` don't
        leak machine names. Other manifest fields (CPU model, OS,
        memory, library versions) are not redacted; the redaction is
        scoped to the hostname only.
    rhf_options / uhf_options / rks_options / uks_options
        Optional override for the respective SCF options struct.
    progress
        Live progress logger for long-running jobs. Default behavior
        is **ON** — the job emits a banner, per-stage milestones, and
        a final summary to stdout (line-flushed) so the canonical
        ``nohup python LiH.py > LiH.log 2>&1 &`` + ``tail -f LiH.log``
        workflow shows progress in real time. The ``.out`` file is
        also line-buffered so ``tail -f output-LiH.out`` works without
        any extra setup.

        Pass ``progress=False`` to silence stdout (the ``.out`` file
        is still written normally — only the live mirror is
        suppressed). Pass a :class:`vibeqc.ProgressLogger` instance
        for fully custom routing (tee to a persistent file, mute,
        thread one logger through nested calls).

        Set ``VIBEQC_LIVE_LOGGING=0`` in the environment to disable
        live progress globally — useful for batch scripts that don't
        want to edit every input file. The env var only takes effect
        when ``progress`` is left at its default (``None``); explicit
        ``progress=True`` / ``progress=False`` / a ``ProgressLogger``
        instance always wins, so a debugging session can re-enable
        progress for one shell.

        Per-iteration progress for periodic SCFs (which run in
        Python) is streamed live through this same logger via the
        lower-level ``run_*_periodic_*`` entry points; molecular SCFs
        run in C++ and only emit a pre-SCF banner + post-SCF summary
        live, with the per-iteration trace landing in the ``.out``
        when the SCF returns.
    verbose
        Integer verbosity level (PySCF convention, 0..9, default
        4). Each level is a strict superset of the one below, so
        bumping ``verbose`` only adds output:

        * ``0`` — silent (nothing live; ``.out`` is still written)
        * ``1`` — banner + warnings + final SCF summary only
        * ``2`` — add per-stage milestones + ``info()`` lines
        * ``3`` — add per-stage timing on stage exit
        * ``4`` — add per-iteration SCF rows (DEFAULT)
        * ``5`` — add inline RSS-memory snapshots
        * ``6+`` — phase-level wall-clock breakdown live (overlaps
          the post-mortem ``.perf`` log on purpose)

        Pass ``verbose=None`` (the default) to read the
        ``VIBEQC_VERBOSE`` env var; if unset, falls back to 4.
        Ignored when ``progress`` is a :class:`ProgressLogger`
        instance — that logger's own level wins.
    use_logging
        If ``True``, route progress through
        ``logging.getLogger("vibeqc.run_job")`` instead of bare
        ``stdout`` writes. Banner / stage milestones land at
        ``INFO``; per-iteration SCF rows at ``DEBUG``; warnings at
        ``WARNING``. Composes naturally with stdlib handlers
        (``RotatingFileHandler``, syslog, ``dictConfig``)::

            import logging
            logging.basicConfig(level=logging.INFO)
            vq.run_job(..., use_logging=True)

        ``progress=False`` still wins as a hard kill switch — the
        verbose-level gate runs *before* the logging call, so a
        silent run stays silent regardless of the active logging
        config. Ignored when ``progress`` is a pre-built
        :class:`ProgressLogger` instance.
    perf_log
        Optional path (or ``True`` to use ``{output}.perf``) to write
        a post-mortem performance / debug breakdown — phase-level
        wall + CPU times, memory snapshots, parallelism flags. The
        live ``progress=`` log shows progress *during* the run; the
        perf log shows where the time went *afterwards*. Off by
        default. Pass an explicit path, ``True`` to emit alongside
        ``{output}.out``, or set the ``VIBEQC_PERFLOG=path`` env
        var (which wins when ``perf_log`` is left at ``None``).
        See :mod:`vibeqc.perf` for the full instrumentation surface.
    structured_log
        Optional NDJSON (one-JSON-record-per-line) log capturing
        every SCF transition — banner, job_start, memory_estimate,
        per-iter rows, scf_converged, properties, job_end. Off by
        default. Pass ``True`` to emit ``{output}.scf.jsonl``,
        a path-like to write there explicitly, or set the
        ``VIBEQC_STRUCTURED_LOG=path`` env var (which wins when
        ``structured_log`` is left at ``False``). The format is
        stable: events are append-only, fields are never renamed
        or removed. See :mod:`vibeqc.structured_log` for the full
        event catalog.
    crash_dump
        Write a snapshot to ``{output}.dump`` (TOML) plus binary
        attachments (``.dump.density.npy``, ``.dump.fock.npy``,
        ``.dump.mo.npy``) when the SCF fails ungracefully — raised
        exception (NaN, linear dependence, memory error). Default
        ``True``: post-mortem reproducibility costs nothing on
        success and saves a re-run on failure. Pass ``False`` (or
        set ``VIBEQC_NO_CRASH_DUMP=1`` in the environment) to
        disable. The dump is written alongside ``{output}.out`` in
        the standard ``output.*`` family — re-attach the ``.dump``
        + ``.dump.density.npy`` to a bug report and the maintainer
        can reconstruct the failing state via
        :func:`vibeqc.load_dump`. See :mod:`vibeqc.crash_dump` for
        the dump format. The exception is always re-raised after
        the dump is written; ``crash_dump=True`` does *not* swallow
        failures.
    hessian
        When True, compute harmonic vibrational frequencies via
        finite-difference Hessian. Default False. Cost: ~6N SCF evals.
        Results printed to .out and embedded in QVF for vibe-view.

    Returns
    -------
    The SCF result object (RHFResult / UHFResult / RKSResult / UKSResult).
    """
    output_stem = Path(os.fspath(output))
    out_path = output_stem.with_suffix(".out")
    molden_path = output_stem.with_suffix(".molden")
    traj_path = output_stem.with_suffix(".traj")

    # Resolve perf_log target. ``True`` → emit to {output}.perf;
    # str/Path → use that path verbatim; None → defer to
    # VIBEQC_PERFLOG env var inside perf_log() (or no-op if unset).
    if perf_log is True:
        _perf_target: Optional[Path] = output_stem.with_suffix(".perf")
    elif perf_log is False or perf_log is None:
        _perf_target = None
    else:
        _perf_target = Path(os.fspath(perf_log))

    # Resolve structured_log target. Default OFF (False) — only emit
    # when the caller opts in. Resolution mirrors perf_log:
    #   True             → {output}.scf.jsonl
    #   str/PathLike     → use verbatim
    #   False / None     → defer to VIBEQC_STRUCTURED_LOG env var (or
    #                       leave disabled if the env var is unset)
    if structured_log is True:
        _structured_target: Optional[Path] = output_stem.with_suffix(".scf.jsonl")
    elif structured_log is False or structured_log is None:
        _structured_target = None
    else:
        _structured_target = Path(os.fspath(structured_log))

    # Resolve crash_dump target. Default ON: dump on failure unless the
    # caller explicitly opts out (or VIBEQC_NO_CRASH_DUMP=1). The
    # post-mortem snapshot is cheap on success (zero bytes written),
    # and saves a re-run on failure. Resolution:
    #   True             → {output}.dump (default)
    #   str/PathLike     → use verbatim as the stem
    #   False            → disabled
    #   None             → also default-on (treated as True)
    _crash_off_env = os.environ.get(
        "VIBEQC_NO_CRASH_DUMP",
        "",
    ).strip().lower() in ("1", "true", "yes", "on")
    if crash_dump is False or _crash_off_env:
        _crash_stem: Optional[Path] = None
    elif crash_dump is True or crash_dump is None:
        _crash_stem = output_stem
    else:
        _crash_stem = Path(os.fspath(crash_dump))

    # Default-ON progress: when the caller didn't pass a value,
    # check the env-var opt-out (VIBEQC_LIVE_LOGGING=0) and otherwise
    # turn it on. This lets every backgrounded ``python input.py >
    # log 2>&1 &`` show progress without the user having to learn
    # a new kwarg — answering the "is my job stuck or actually
    # running?" question by default. Explicit progress= (True/False/
    # logger) bypasses the env var entirely.
    if progress is None:
        env = os.environ.get("VIBEQC_LIVE_LOGGING", "").strip().lower()
        if env in ("0", "false", "no", "off"):
            progress = False
        else:
            progress = True

    # Verbose level resolution (v0.5.3). When the caller leaves
    # ``verbose=None`` we read VIBEQC_VERBOSE; otherwise the
    # explicit argument wins. Falls back to the package default
    # (level 4 — banner + stages + per-iter SCF) when neither
    # is set. The env var is a parsing best-effort: a junk value
    # silently falls back to the default rather than raising,
    # since a typo in $VIBEQC_VERBOSE shouldn't kill someone's
    # overnight run.
    if verbose is None:
        env_verbose = os.environ.get("VIBEQC_VERBOSE", "").strip()
        if env_verbose:
            try:
                verbose_level = int(env_verbose)
            except ValueError:
                verbose_level = None  # type: ignore[assignment]
            else:
                verbose_level = max(0, verbose_level)
        else:
            verbose_level = None  # type: ignore[assignment]
    else:
        verbose_level = max(0, int(verbose))

    plog = resolve_progress(
        progress,
        verbose=verbose_level,
        use_logging=use_logging,
    )

    # Composite 3c keyword resolution (e.g. ``method="hf-3c"``). For
    # non-composite ``method`` values this is a pass-through. Composite
    # values get their (basis, functional, dispersion) tuple inferred
    # from the :mod:`vibeqc.composites` registry; raises
    # :class:`CompositeUnavailable` for recipes whose prerequisites
    # have not yet landed.
    method, basis, functional, dispersion, composite_recipe = _apply_composite(
        method,
        basis=basis,
        functional=functional,
        dispersion=dispersion,
    )
    if not basis:
        raise ValueError(
            "run_job: basis is required. Pass basis='def2-svp' (or "
            "another supported basis), or use a composite keyword "
            "(method='hf-3c', …) which carries its own basis. "
            "Composite catalogue: "
            f"{[r.name for r in __import__('vibeqc').list_composites()]}"
        )

    resolved_method = _select_method(method, molecule, functional)

    # Composite recipes may request D4 dispersion; the D4 path is
    # wired through compute_d4 in the post-SCF block below. A D4
    # request short-circuits the D3-BJ resolution.
    use_d4 = isinstance(dispersion, str) and dispersion.strip().lower() == "d4"

    # Build the declarative OutputPlan once. It drives BOTH the
    # dry-run pre-flight (below) and the real run's manifest
    # lifecycle (the OutputWriter constructed before the SCF). One
    # source of truth for "what files will this job produce".
    _cube_req_plan = parse_write_cube_kwarg(write_cube)
    _output_plan = OutputPlan.from_run_job_kwargs(
        output=output_stem,
        method=resolved_method,
        basis=basis,
        functional=functional,
        optimize=optimize,
        write_molden_file=write_molden_file,
        write_xyz=write_xyz_file,
        citations=citations,
        write_population=write_population_file,
        write_cube_density=_cube_req_plan.density,
        cube_mo_labels=tuple(str(lbl) for lbl in _cube_req_plan.mo_labels),
        perf_log=perf_log,
        structured_log=structured_log,
        crash_dump=crash_dump,
        output_qvf=output_qvf,
    )

    # Dry-run short-circuit (Phase O3). When ``dry_run=True`` or the
    # ``VIBEQC_DRY_RUN`` env var is set, write a one-shot
    # ``{output}.system`` with ``[outputs].status = "dry_run"``,
    # print the declared-artefacts summary to stdout, and return
    # ``None`` without running the SCF. This is the pre-flight path
    # that ``vq submit`` uses to learn which files a job will
    # produce. Run before any heavyweight setup (basis-set
    # construction, memory estimate, perf tracker) so the
    # short-circuit is genuinely cheap.
    if dry_run or is_dry_run_requested():
        # Make sure the parent directory exists so we don't fail on a
        # freshly-typed `-o /tmp/new-subdir/foo` path.
        output_stem.parent.mkdir(parents=True, exist_ok=True)
        dry_run_manifest(
            _output_plan,
            record_hostname=record_hostname,
        )
        return None

    # Resolve dispersion up front so a bad spec fails before we touch
    # the filesystem. d3_params is None iff dispersion is disabled.
    # A D4 request (composite recipes r²SCAN-3c / ωB97X-3c) short-
    # circuits the D3-BJ path — D4 is applied in the post-SCF block.
    if use_d4:
        d3_params = None
    else:
        d3_params = _resolve_dispersion(dispersion, functional)

    # Pin the thread count if the caller asked for one; otherwise leave
    # the current state alone. We always query at the end so the output
    # reports what was actually used (which may differ from what the
    # user asked for — e.g. on a single-core build or OpenMP-disabled
    # environment).
    if num_threads is not None:
        set_num_threads(int(num_threads))
    threads_in_use = get_num_threads()

    # Ensure the parent directory exists so users can give names like
    # "runs/water" without pre-creating "runs/".
    out_path.parent.mkdir(parents=True, exist_ok=True)

    # Construct the OutputWriter — the per-job coordinator that owns
    # ``{output}.system`` for the rest of the run. This writes the
    # manifest immediately with the ``[plan]`` section + ``[outputs]
    # .status = "running"`` *before* the SCF starts, so a ``vq``
    # daemon polling the workspace sees the declared output set and a
    # liveness signal from the first moment. ``finish()`` /
    # ``crash()`` at the exit paths flip the status; the end-of-job
    # record-sweep fills ``[[outputs.files]]`` with the artefacts
    # that actually landed. Replaces the bare end-of-job
    # ``write_system_manifest`` call (pre-v1.0 dispatch-overhaul).
    _output_writer = OutputWriter(
        _output_plan,
        record_hostname=record_hostname,
    )

    t_job_start = time.perf_counter()

    # ``buffering=1`` requests line-buffered I/O so a `tail -f` against
    # the .out file shows new lines as the SCF / property pipeline emits
    # them. We also call ``f.flush()`` explicitly after major blocks for
    # the same reason — line buffering only kicks in on newlines.
    #
    # The perf-log context manager wraps the whole body so PerfScope
    # entries from anywhere downstream — periodic SCF stages, gradient
    # calls, anything that calls ``with PerfScope("..."): ...`` —
    # accumulate into the same tracker. When ``_perf_target`` is None
    # (and ``$VIBEQC_PERFLOG`` isn't set), the context manager is a
    # cheap no-op: PerfScope.__enter__/__exit__ become a single
    # ContextVar lookup with no I/O.
    hessian_result = None
    with (
        _perf_log_ctx(_perf_target),
        _structured_log_ctx(_structured_target) as _slog,
        crash_dump_context(_crash_stem) as _crash_target,
        open(out_path, "w", encoding="utf-8", buffering=1) as f,
        PerfScope("run_job.total"),
    ):
        # Structured log: banner + job_start records. Always safe to
        # emit — _slog.emit is a no-op when the log is disabled.
        _libs = library_versions()
        _fp = run_fingerprint(
            method=resolved_method,
            basis=basis,
            functional=functional,
            molecule=molecule,
        )
        _slog.emit(
            "banner",
            vibeqc_version=VIBEQC_VERSION,
            libint=_libs.get("libint", "unknown"),
            libxc=_libs.get("libxc", "unknown"),
            spglib=_libs.get("spglib", "unknown"),
            run_fingerprint=_fp,
        )
        _slog.emit(
            "job_start",
            method=resolved_method,
            basis=basis,
            functional=functional,
            optimize=bool(optimize),
            threads=int(threads_in_use),
            n_atoms=int(len(list(molecule.atoms))),
            charge=int(molecule.charge),
            multiplicity=int(molecule.multiplicity),
            n_electrons=int(molecule.n_electrons()),
            output_stem=str(output_stem),
        )
        f.write(banner() + "\n\n")
        f.write(_job_header(resolved_method, basis, functional) + "\n")
        f.write(_geom_summary(molecule) + "\n")
        f.write(f"  Threads: {threads_in_use}  (OpenMP shared-memory parallelism)\n\n")
        f.flush()
        plog.banner(
            f"run_job  {_job_header(resolved_method, basis, functional).strip()}"
        )
        plog.info(f"Threads: {threads_in_use}  (OpenMP shared-memory parallelism)")
        plog.info(f"Output file: {out_path}")

        t_opt = 0.0
        _traj_frames = []
        _traj_energies = []
        if optimize:
            _opt_backend = _resolve_optimizer_backend(optimizer_backend)

            if _opt_backend == "ase":
                f.write(f"  Geometry optimization (ASE/BFGS) -> {traj_path.name}\n")
                f.write(f"  fmax = {fmax} eV/A, max_steps = {max_opt_steps}\n\n")
                f.flush()
                with (
                    plog.stage(
                        "geometry_optimization",
                        detail=f"BFGS, fmax={fmax} eV/A, max_steps={max_opt_steps}",
                    ),
                    PerfScope("geometry_optimization"),
                ):
                    t0 = time.perf_counter()
                    molecule = _optimize_geometry(
                        molecule,
                        basis,
                        functional=functional,
                        trajectory_path=traj_path,
                        fmax=fmax,
                        max_steps=max_opt_steps,
                        dispersion_params=d3_params,
                        method=resolved_method,
                        selected_ci_options=selected_ci_options,
                        dmrg_options=dmrg_options,
                        v2rdm_options=v2rdm_options,
                        transcorrelated_options=transcorrelated_options,
                        active_space=active_space,
                        rhf_options=rhf_options,
                        uhf_options=uhf_options,
                        rks_options=rks_options,
                        uks_options=uks_options,
                    )
                    t_opt = time.perf_counter() - t0
                f.write("  Optimized geometry\n")
                f.write(_geom_summary(molecule) + "\n\n")
                f.flush()

                # Read trajectory frames for QVF archive.
                _traj_frames = []
                _traj_energies = []
                try:
                    from ase.io.trajectory import Trajectory
                    from ase.units import Bohr, Hartree

                    traj = Trajectory(str(traj_path))
                    for atoms in traj:
                        pos_bohr = atoms.positions / Bohr
                        frame_mol = Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_bohr)
                            ],
                            molecule.charge,
                            molecule.multiplicity,
                        )
                        _traj_frames.append(frame_mol)
                        try:
                            e_eV = atoms.get_potential_energy()
                            if e_eV is not None and e_eV == e_eV:
                                _traj_energies.append(float(e_eV) / Hartree)
                        except Exception:
                            pass
                except Exception:
                    _traj_frames = []
                    _traj_energies = []
            else:
                # Native scipy L-BFGS-B backend.
                from .molecular_optimize import optimize_molecule as _native_opt

                # Convert fmax from eV/Å to Ha/bohr.  1 eV/Å ≈ 0.045 Ha/bohr.
                _conv_grad = fmax * 0.045
                f.write(
                    f"  Geometry optimization (native L-BFGS-B)\n"
                    f"  gtol = {_conv_grad:.2e} Ha/bohr, max_steps = {max_opt_steps}\n\n"
                )
                f.flush()
                with (
                    plog.stage(
                        "geometry_optimization",
                        detail=f"L-BFGS-B, gtol={_conv_grad:.1e} Ha/bohr",
                    ),
                    PerfScope("geometry_optimization"),
                ):
                    t0 = time.perf_counter()
                    _opt_result = _native_opt(
                        molecule,
                        basis,
                        method=resolved_method,
                        functional=functional,
                        rhf_options=rhf_options,
                        uhf_options=uhf_options,
                        rks_options=rks_options,
                        uks_options=uks_options,
                        max_iter=max_opt_steps,
                        conv_tol_grad=_conv_grad,
                        dispersion_params=d3_params,
                        solvent=solvent,
                        record_trajectory=bool(output_qvf),
                        progress=False,
                    )
                    t_opt = time.perf_counter() - t0
                molecule = _opt_result.system
                _traj_frames = _opt_result.trajectory_frames or None
                _traj_energies = _opt_result.trajectory_energies or None

                f.write("  Optimized geometry\n")
                f.write(_geom_summary(molecule) + "\n\n")
                f.flush()

        with PerfScope("basis_set_construction"):
            basis_obj = BasisSet(molecule, basis)

        # Memory pre-flight: render the estimate into the log, then
        # (unless the user has overridden) abort if we'd exceed
        # available RAM. Doing the abort AFTER writing to the file
        # leaves the user with a readable explanation on disk.
        if resolved_method in ("rhf", "uhf", "rks", "uks"):
            est_opts = {
                "rhf": rhf_options,
                "uhf": uhf_options,
                "rks": rks_options,
                "uks": uks_options,
            }.get(resolved_method)
        else:
            est_opts = None
        estimate = estimate_memory(
            molecule,
            basis_obj,
            method=resolved_method,
            options=est_opts,
        )
        f.write(
            "  "
            + format_memory_report(
                estimate,
                override_requested=memory_override,
            ).replace("\n", "\n  ")
            + "\n\n"
        )
        f.flush()
        _slog.emit(
            "memory_estimate",
            total_gb=float(estimate.total_gb),
            raw_total_bytes=int(estimate.raw_total_bytes),
            headroom_factor=float(estimate.headroom_factor),
            by_category={k: int(v) for k, v in estimate.by_category.items()},
        )
        check_memory(estimate, allow_exceed=memory_override)

        plog.info(
            f"Starting molecular SCF ({resolved_method.upper()}"
            + (f" / {functional}" if functional else "")
            + "). Per-iteration progress runs in compiled C++ and is "
            "not streamed live; the trace will be written when the SCF "
            "returns."
        )
        # Memory snapshot before SCF: lets the perf log show RSS
        # growth caused by the integral / Fock build, separate from
        # the basis-set / pre-flight overhead.
        from .perf import active_tracker as _at

        _tracker = _at()
        if _tracker is not None:
            _tracker.snapshot_memory("start_of_scf")
        t0 = time.perf_counter()
        # Crash-dump scope: if the C++ SCF raises (NaN, linear
        # dependence, memory error, …), we want a snapshot of the
        # last known state on disk before the exception bubbles up
        # and the user has nothing to debug from. The state we can
        # capture from outside the C++ call is limited (no live
        # density / Fock) — we record the geometry, options, and
        # phase. Any deeper introspection needs a future C++ hook.
        try:
            with PerfScope(f"scf.{resolved_method}"):
                result = _run_single_point(
                    resolved_method,
                    molecule,
                    basis_obj,
                    functional=functional,
                    rhf_options=rhf_options,
                    uhf_options=uhf_options,
                    rks_options=rks_options,
                    uks_options=uks_options,
                    selected_ci_options=selected_ci_options,
                    dmrg_options=dmrg_options,
                    v2rdm_options=v2rdm_options,
                    transcorrelated_options=transcorrelated_options,
                    active_space=active_space,
                    solvent=solvent,
                )
        except BaseException as _scf_exc:
            t_scf = time.perf_counter() - t0
            f.write(f"\n  SCF FAILED: {type(_scf_exc).__name__}: {_scf_exc}\n")
            f.flush()
            _slog.emit(
                "scf_failed",
                exception_type=type(_scf_exc).__name__,
                exception=str(_scf_exc),
                wall_s=float(t_scf),
            )
            if _crash_target is not None:
                _opts_for_dump = (
                    {
                        "rhf": rhf_options,
                        "uhf": uhf_options,
                        "rks": rks_options,
                        "uks": uks_options,
                    }.get(resolved_method)
                    if resolved_method in ("rhf", "uhf", "rks", "uks")
                    else None
                )
                dump_on_failure(
                    _crash_target,
                    _scf_exc,
                    {"phase": f"scf.{resolved_method}", "n_iters_completed": 0},
                    options=_opts_for_dump,
                    molecule=molecule,
                )
                f.write(
                    f"  Crash dump written to "
                    f"{_crash_target.with_suffix('.dump').name}\n"
                )
                f.flush()
            # Flip the manifest status to "crashed" before the
            # exception propagates — a ``vq`` daemon polling
            # ``{output}.system`` then sees the job aborted rather
            # than mistaking a stale "running" for a live job.
            try:
                _output_writer.crash(wall_seconds=t_scf)
            except Exception:
                # Manifest bookkeeping must never mask the real SCF
                # exception we're about to re-raise.
                pass
            # Re-raise: crash_dump=True does NOT swallow the failure;
            # it just makes the failure debuggable.
            raise
        t_scf = time.perf_counter() - t0
        if _tracker is not None:
            _tracker.snapshot_memory("end_of_scf")
            # Also record the per-iteration trace so the perf log
            # carries the same info as the .out's SCF trace, in
            # tabular form. The C++ result.scf_trace is the canonical
            # source for these numbers.
            for step in getattr(result, "scf_trace", []) or []:
                _tracker.add_scf_iter(
                    iter=int(step.iter),
                    energy=float(step.energy),
                    dE=float(step.delta_e) if step.iter > 1 else None,
                    grad=float(step.grad_norm),
                    diis=int(step.diis_subspace),
                )
        # Structured log: per-iter rows sourced from the C++ trace,
        # which is the canonical record of what the SCF did. Periodic
        # SCFs (Python-driven) emit ``scf_iter`` live via
        # ProgressLogger.iteration() — see vibeqc.progress; the
        # molecular C++ path runs as a black box so we replay its
        # trace here. The matching ``scf_converged`` record is emitted
        # below via plog.converged() so periodic and molecular paths
        # share the funnel.
        for step in getattr(result, "scf_trace", []) or []:
            _slog.emit(
                "scf_iter",
                iter=int(step.iter),
                energy=float(step.energy),
                dE=(float(step.delta_e) if step.iter > 1 else None),
                grad_norm=float(step.grad_norm),
                diis_subspace=int(step.diis_subspace),
            )
        # Non-convergence path: write a crash dump too, since "ran
        # to max_iter" is a real failure mode. We don't raise — the
        # SCF returned a result and the user can inspect it — but we
        # leave the .dump alongside the .out so the diagnostic recipe
        # is the same as the exception path.
        if not bool(getattr(result, "converged", False)) and _crash_target is not None:
            dump_state = {
                "phase": f"scf.{resolved_method} (max_iter)",
                "scf_trace": list(getattr(result, "scf_trace", []) or []),
                "n_iters_completed": int(getattr(result, "n_iter", 0)),
            }
            for k_state, k_attr in (
                ("density", "density"),
                ("density_alpha", "density_alpha"),
                ("density_beta", "density_beta"),
                ("fock", "fock"),
                ("fock_alpha", "fock_alpha"),
                ("fock_beta", "fock_beta"),
                ("mo_coeffs", "mo_coeffs"),
                ("mo_coeffs_alpha", "mo_coeffs_alpha"),
                ("mo_coeffs_beta", "mo_coeffs_beta"),
            ):
                v = getattr(result, k_attr, None)
                if v is not None:
                    dump_state[k_state] = v
            _opts_for_dump = (
                {
                    "rhf": rhf_options,
                    "uhf": uhf_options,
                    "rks": rks_options,
                    "uks": uks_options,
                }.get(resolved_method)
                if resolved_method in ("rhf", "uhf", "rks", "uks")
                else None
            )
            dump_on_failure(
                _crash_target,
                None,
                dump_state,
                phase=f"scf.{resolved_method} (max_iter)",
                options=_opts_for_dump,
                molecule=molecule,
            )
            f.write(
                f"\n  SCF did not converge — crash dump written to "
                f"{_crash_target.with_suffix('.dump').name}\n"
            )
            f.flush()

        # Check if we got a SolverResult (non-mean-field method)
        if isinstance(result, SolverResult):
            f.write("\n  " + "-" * 60 + "\n")
            f.write("  Non-mean-field solver result\n")
            f.write("  " + "-" * 60 + "\n")
            f.write(_format_solver_trace(result) + "\n")
        else:
            f.write(
                format_scf_trace(
                    result, molecule=molecule, basis=basis_obj, include_banner=False
                )
                + "\n"
            )
        f.flush()
        plog.converged(
            n_iter=int(getattr(result, "n_iter", 0)),
            energy=float(getattr(result, "energy", 0.0)),
            converged=bool(getattr(result, "converged", False)),
        )

        # Post-SCF Coupled-Cluster (CCSD / CCSD(T)).
        if method in ("ccsd", "ccsd(t)") and bool(getattr(result, "converged", False)):
            # Guard: CCSD numerics are still under validation (see
            # docs/handover_ccsd.md).  Set VIBEQC_EXPERIMENTAL_CCSD=1
            # to proceed despite known issues.
            if not os.environ.get("VIBEQC_EXPERIMENTAL_CCSD", "").strip() == "1":
                raise RuntimeError(
                    "CCSD / CCSD(T) numerics are still under validation. "
                    "See docs/handover_ccsd.md for current status. "
                    "Set VIBEQC_EXPERIMENTAL_CCSD=1 to proceed anyway."
                )
            from .cc import CCSDOptions
            from .cc import run_ccsd as _run_ccsd_py

            cc_opts = CCSDOptions()
            cc_opts.compute_triples = method == "ccsd(t)"
            # Default frozen-core for first-row atoms (freeze 1s)
            has_first_row = any(atom.Z >= 3 and atom.Z <= 10 for atom in molecule.atoms)
            if has_first_row:
                cc_opts.n_frozen_core = 1
            cc_opts.resolve_aux_basis(basis)

            with (
                plog.stage(
                    "ccsd",
                    detail="DF-CCSD" + ("(T)" if cc_opts.compute_triples else ""),
                ),
                PerfScope("ccsd"),
            ):
                cc_result = _run_ccsd_py(molecule, basis_obj, result, cc_opts)

            _slog.emit(
                "ccsd_converged",
                converged=bool(cc_result.converged),
                n_iter=int(cc_result.n_iter),
                e_ccsd_correlation=float(cc_result.e_ccsd_correlation),
                e_ccsd=float(cc_result.e_ccsd),
                e_t=float(cc_result.e_t) if cc_opts.compute_triples else None,
                e_total=float(cc_result.e_total),
                t1_norm=float(cc_result.t1_norm),
                t2_norm=float(cc_result.t2_norm),
            )

            # Write CCSD iteration table
            triples_label = " CCSD(T)" if cc_opts.compute_triples else " CCSD"
            f.write("\n  Coupled-Cluster" + triples_label + "\n  " + "-" * 78 + "\n")
            if cc_opts.compute_triples:
                hdr = "  {:<5s}  {:>16s}  {:>12s}  {:>12s}  {:>12s}  {:>6s}".format(
                    "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS"
                )
            else:
                hdr = "  {:<5s}  {:>16s}  {:>12s}  {:>12s}  {:>12s}  {:>6s}".format(
                    "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS"
                )
            f.write(hdr + "\n")
            f.write("  " + "-" * 78 + "\n")
            for step in cc_result.cc_trace:
                f.write(
                    "  {:<5d}  {:>16.10f}  {:>12.6e}  {:>12.6e}  {:>12.6e}  {:>6d}\n".format(
                        int(step.iter),
                        float(step.energy),
                        float(step.delta_e) if step.iter > 1 else 0.0,
                        float(step.r1_norm),
                        float(step.r2_norm),
                        int(step.diis_subspace),
                    )
                )
            f.write("  " + "-" * 78 + "\n")
            if cc_result.converged:
                f.write(f"  CCSD converged in {cc_result.n_iter} iterations\n")
            else:
                f.write("  CCSD DID NOT CONVERGE\n")
            f.write(
                f"  E(CCSD correlation) = {cc_result.e_ccsd_correlation:16.10f} Ha\n"
                f"  E(CCSD)             = {cc_result.e_ccsd:16.10f} Ha\n"
            )
            if cc_opts.compute_triples:
                f.write(
                    f"  E(T) correction      = {cc_result.e_t:16.10f} Ha\n"
                    f"  E(CCSD(T))           = {cc_result.e_ccsd_t:16.10f} Ha\n"
                )
            f.write(
                f"  T1 norm              = {cc_result.t1_norm:16.10f}\n"
                f"  T2 norm              = {cc_result.t2_norm:16.10f}\n"
            )
            f.write("\n")
            f.flush()

            # Store CC result on the SCF result for downstream use
            result._cc_result = cc_result

        # Post-SCF dispersion (D3-BJ) as an additive correction.
        if d3_params is not None:
            disp = compute_d3bj(molecule, d3_params)
            e_scf = float(getattr(result, "energy", 0.0))
            e_total = e_scf + float(disp.energy)
            f.write(
                "\n  Dispersion correction (D3-BJ)\n"
                "  " + "-" * 52 + "\n"
                f"  {'s6':>10s} {d3_params.s6:14.6f}\n"
                f"  {'s8':>10s} {d3_params.s8:14.6f}\n"
                f"  {'a1':>10s} {d3_params.a1:14.6f}\n"
                f"  {'a2':>10s} {d3_params.a2:14.6f}\n"
                f"  {'E_disp':>10s} {disp.energy:14.8f} Ha"
                f"  ({disp.energy * 627.5094740631:+.4f} kcal/mol)\n"
                f"  {'E_SCF':>10s} {e_scf:14.8f} Ha\n"
                f"  {'E_total':>10s} {e_total:14.8f} Ha\n"
            )
            # Wrap the SCF result so callers can access both components
            # without breaking existing accesses to mo_coeffs, fock, etc.
            if not isinstance(result, SolverResult):
                result = _DispersionAugmented(result, float(disp.energy), d3_params)

        # ---- Composite 3c post-SCF corrections (D4 + gCP + SRB) -------
        # Only runs when a composite recipe is active. Each piece is
        # additive and logged separately; the composite total is the
        # SCF energy + every correction.
        e_d4 = 0.0
        e_gcp = 0.0
        e_srb = 0.0
        if use_d4:
            if not dftd4_available():
                raise ImportError(
                    "composite recipe requests D4 dispersion but the "
                    "optional 'dftd4' package is not installed.\n  "
                    "Install via `pip install -e '.[dispersion]'` or "
                    "`pip install dftd4`."
                )
            # dftd4 ships per-composite damping keyed on the composite
            # name (e.g. 'r2scan-3c', 'wb97x-3c') — distinct from the
            # parent-functional damping. Prefer the composite name.
            d4_func = (
                composite_recipe.name
                if composite_recipe is not None
                else (functional or "hf")
            )
            d4_result = compute_d4(
                molecule,
                d4_func,
                charge=float(molecule.charge),
            )
            e_d4 = float(d4_result.energy)
            f.write(
                "\n  Dispersion correction (D4)\n"
                "  " + "-" * 52 + "\n"
                f"  {'functional':>10s} {d4_result.functional!s:>14s}\n"
                f"  {'E_disp':>10s} {e_d4:14.8f} Ha"
                f"  ({e_d4 * 627.5094740631:+.4f} kcal/mol)\n"
            )

        if composite_recipe is not None and composite_recipe.gcp_basis:
            try:
                gcp_result = compute_gcp(molecule, composite_recipe.gcp_basis)
                e_gcp = float(gcp_result.energy)
                f.write(
                    "\n  Geometric counterpoise correction (gCP)\n"
                    "  " + "-" * 52 + "\n"
                    f"  {'basis':>10s} {gcp_result.basis_name!s:>14s}\n"
                    f"  {'E_gCP':>10s} {e_gcp:14.8f} Ha"
                    f"  ({e_gcp * 627.5094740631:+.4f} kcal/mol)\n"
                )
            except GCPDataMissing as exc:
                # gCP data table not bundled for this basis — log and
                # continue with E_gCP = 0 so the composite total is
                # explicitly marked incomplete rather than silently
                # wrong. Same posture as the dispersion builtin-stub.
                f.write(
                    "\n  Geometric counterpoise correction (gCP)\n"
                    "  " + "-" * 52 + "\n"
                    f"  SKIPPED — {exc}\n"
                )

        if composite_recipe is not None and composite_recipe.sr_mod is not None:
            srb = composite_recipe.sr_mod
            atomic_numbers = [int(a.Z) for a in molecule.atoms]
            positions = [list(a.xyz) for a in molecule.atoms]
            e_srb = float(srb.evaluate(atomic_numbers, positions))
            f.write(
                f"\n  {srb.name}\n"
                "  " + "-" * 52 + "\n"
                f"  {'E_SRB':>10s} {e_srb:14.8f} Ha"
                f"  ({e_srb * 627.5094740631:+.4f} kcal/mol)\n"
            )

        if composite_recipe is not None:
            composite_total = (
                float(getattr(result, "energy", 0.0))
                + float(getattr(result, "e_dispersion", 0.0))
                + e_d4
                + e_gcp
                + e_srb
            )
            f.write(
                f"\n  Composite total ({composite_recipe.name})\n"
                "  " + "-" * 52 + "\n"
                f"  {'E_SCF':>10s} "
                f"{float(getattr(result, 'energy', 0.0)):14.8f} Ha\n"
                f"  {'E_disp':>10s} "
                f"{float(getattr(result, 'e_dispersion', 0.0)) + e_d4:14.8f} Ha\n"
                f"  {'E_gCP':>10s} {e_gcp:14.8f} Ha\n"
                f"  {'E_SRB':>10s} {e_srb:14.8f} Ha\n"
                f"  {'E_total':>10s} {composite_total:14.8f} Ha\n"
            )

        has_mos = hasattr(result, "mo_energies") or hasattr(result, "mo_energies_alpha")
        if write_molden_file and has_mos:
            with (
                plog.stage("write_molden", detail=str(molden_path.name)),
                PerfScope("write_molden"),
            ):
                write_molden(
                    molden_path,
                    molecule,
                    basis_obj,
                    result,
                    title=str(output_stem.name),
                )
            f.write(f"\n  Molecular orbitals written to {molden_path.name}\n")
            f.flush()

        # Population dump (Phase O6). Always-on for molecular runs by
        # default — cheap to compute (the SCF density is in memory),
        # cheap to write, and downstream tooling shouldn't have to
        # screen-scrape the .out for charges + bond orders + dipole.
        # The matching block stays in .out for human reading; the
        # .txt + .json siblings are the machine-readable form.
        if write_population_file and has_mos:
            pop_txt = output_stem.parent / (output_stem.name + ".population.txt")
            try:
                with (
                    plog.stage("write_population", detail=str(pop_txt.name)),
                    PerfScope("write_population"),
                ):
                    write_population(
                        output_stem,
                        result,
                        basis_obj,
                        molecule,
                    )
                f.write(
                    f"  Population dump written to "
                    f"{pop_txt.name} + "
                    f"{output_stem.name}.population.json\n"
                )
                f.flush()
            except Exception as _pop_exc:
                f.write(
                    f"  (warning: population dump failed: "
                    f"{type(_pop_exc).__name__}: {_pop_exc})\n"
                )
                f.flush()

        # Volumetric cubes (Phase O6). Opt-in via write_cube=: True /
        # "density" / "homo" / "lumo" / list-of-those-or-int-indices.
        # Each cube is wrapped in its own try/except so a single
        # grid-evaluation failure on one MO doesn't block the others.
        _cube_req = parse_write_cube_kwarg(write_cube)
        if _cube_req and has_mos:
            if _cube_req.density:
                try:
                    with (
                        plog.stage("write_cube_density"),
                        PerfScope("write_cube_density"),
                    ):
                        cube_path = write_cube_density_for_run_job(
                            output_stem,
                            result,
                            basis_obj,
                            molecule,
                            spacing=cube_spacing,
                            padding=cube_padding,
                        )
                    f.write(f"  Density cube written to {cube_path.name}\n")
                    f.flush()
                except Exception as _cube_exc:
                    f.write(
                        f"  (warning: density cube write failed: "
                        f"{type(_cube_exc).__name__}: {_cube_exc})\n"
                    )
                    f.flush()
            if _cube_req.mo_labels:
                try:
                    _mo_targets = requested_mo_indices(
                        list(_cube_req.mo_labels),
                        result,
                    )
                except Exception as _exc:
                    f.write(
                        f"  (warning: MO label resolution failed: "
                        f"{type(_exc).__name__}: {_exc})\n"
                    )
                    _mo_targets = []
                for _idx, _name in _mo_targets:
                    try:
                        with (
                            plog.stage(f"write_cube_mo_{_name}"),
                            PerfScope(f"write_cube_mo_{_name}"),
                        ):
                            _mo_path = write_cube_mo_for_run_job(
                                output_stem,
                                result,
                                basis_obj,
                                molecule,
                                _idx,
                                _name,
                                spacing=cube_spacing,
                                padding=cube_padding,
                            )
                        f.write(f"  MO cube written to {_mo_path.name}\n")
                        f.flush()
                    except Exception as _cube_exc:
                        f.write(
                            f"  (warning: MO {_name} cube write failed: "
                            f"{type(_cube_exc).__name__}: {_cube_exc})\n"
                        )
                        f.flush()

        # Citations (Phase O5b). Assemble the reference list for this
        # job (software + libint + libxc-if-DFT + basis-set +
        # functional + DIIS + dispersion-if-used + ECP-if-used) and
        # emit {stem}.bibtex + .references siblings. The matching
        # "## References" block is appended to the .out a few lines
        # below so the text log carries the same references. Failures
        # are non-fatal: a routing miss or a writer crash leaves a
        # warning in the .out and the SCF result is unaffected.
        cite_block_text: str | None = None
        _refs = None
        if citations:
            try:
                _cite_db = load_default_database()
                _dispersion_key = None
                if d3_params is not None:
                    # The framework only ships D3(BJ) today; the
                    # database routes "d3bj" → Grimme 2010 + 2011.
                    _dispersion_key = "d3bj"
                # Detect direct SCF usage for citation routing.
                _uses_direct = _detect_direct_scf(
                    resolved_method,
                    rhf_options,
                    uhf_options,
                    rks_options,
                    uks_options,
                    basis,
                    molecule,
                )
                # For composite-3c jobs (hf-3c / pbeh-3c / …) route
                # citations by the *composite keyword*, not the
                # resolved mean-field method — the composite carries
                # its own defining paper (+ gCP / D3) in
                # routes.methods. ``composite_recipe`` is None for
                # non-composite jobs, in which case the resolved
                # method name is the right routing key.
                _cite_method = (
                    composite_recipe.name
                    if composite_recipe is not None
                    else resolved_method
                )
                _refs = _cite_db.assemble(
                    method=_cite_method,
                    basis=basis,
                    functional=functional,
                    dispersion=_dispersion_key,
                    uses_ase=optimize,  # BFGS path uses ASE
                    direct_scf=_uses_direct,
                )
                write_bibtex(output_stem, _refs)
                write_references(output_stem, _refs)
                cite_block_text = format_references_block(_refs)
                _bib_name = output_stem.with_suffix(".bibtex").name
                _ref_name = output_stem.with_suffix(".references").name
                f.write(f"  Citations written to {_bib_name} + {_ref_name}\n")
                f.flush()
            except Exception as _cite_exc:
                f.write(
                    f"  (warning: citation emission failed: "
                    f"{type(_cite_exc).__name__}: {_cite_exc})\n"
                )
                f.flush()

        # Final geometry as a plain XYZ sibling (Phase O3). ``molecule``
        # at this point is the geometry the SCF actually ran on — the
        # optimised one when ``optimize=True``, otherwise the input.
        # Energy in Hartree is appended to the comment line so ASE /
        # Open Babel can recover the SCF result from the .xyz alone.
        if write_xyz_file:
            xyz_path = output_stem.with_suffix(".xyz")
            try:
                energy_ha = float(getattr(result, "energy", float("nan")))
            except (TypeError, ValueError):
                energy_ha = float("nan")
            try:
                with (
                    plog.stage("write_xyz", detail=str(xyz_path.name)),
                    PerfScope("write_xyz"),
                ):
                    write_xyz(
                        output_stem,
                        molecule,
                        energy_ha=(None if energy_ha != energy_ha else energy_ha),
                    )
                f.write(f"  Final geometry written to {xyz_path.name}\n")
                f.flush()
            except Exception as _xyz_exc:
                # Geometry writer is best-effort — never let a `.xyz`
                # failure tank a finished SCF. The user can always
                # re-run with ``write_xyz_file=False``.
                f.write(
                    f"  (warning: .xyz writer failed: "
                    f"{type(_xyz_exc).__name__}: {_xyz_exc})\n"
                )
                f.flush()

        # Structured log: properties event. Best-effort — the helpers
        # raise on basis-set shapes vibe-qc's Python side can't
        # parse; a failure here must never tank the run, so we wrap
        # each section. The event only fires when at least one
        # property was successfully computed.
        if _slog.enabled and has_mos:
            _props_payload: dict[str, Any] = {}
            try:
                from .properties import (
                    dipole_moment as _dipole,
                )
                from .properties import (
                    loewdin_charges as _low,
                )
                from .properties import (
                    mulliken_charges as _mul,
                )

                _props_payload["mulliken"] = [
                    float(x) for x in _mul(result, basis_obj, molecule)
                ]
                _props_payload["loewdin"] = [
                    float(x) for x in _low(result, basis_obj, molecule)
                ]
                _dip = _dipole(result, basis_obj, molecule)
                _props_payload["dipole"] = {
                    "x": float(_dip.x),
                    "y": float(_dip.y),
                    "z": float(_dip.z),
                    "total": float(_dip.total),
                    "total_debye": float(_dip.total_debye),
                }
            except Exception:
                # Properties are best-effort; the trace already
                # carries a degraded properties block, the structured
                # log just skips the event.
                pass
            if _props_payload:
                _slog.emit("properties", **_props_payload)

        if optimize:
            if _opt_backend == "ase" and traj_path.is_file():
                f.write(f"  Optimization trajectory written to {traj_path.name}\n")
            f.flush()

        t_total = time.perf_counter() - t_job_start
        n_iter = getattr(result, "n_iter", 0)
        iter_avg = (t_scf / n_iter) if n_iter > 0 else float("nan")
        f.write("\n  Timings (wall clock, seconds)\n  " + "-" * 52 + "\n")
        if optimize:
            f.write(f"  {'Geometry optimization':<32s} {t_opt:12.3f}\n")
        f.write(f"  {'SCF total':<32s} {t_scf:12.3f}\n")
        f.write(
            f"  {'SCF avg. per iteration':<32s} {iter_avg:12.3f}  ({n_iter} iters)\n"
        )
        f.write(f"  {'Job total':<32s} {t_total:12.3f}\n")
        f.write(
            f"  Used {threads_in_use} OpenMP thread"
            f"{'s' if threads_in_use != 1 else ''}.\n"
        )
        f.flush()
        plog.info(f"Job total {t_total:.2f}s — output written to {out_path}")
        _slog.emit(
            "job_end",
            total_wall_s=float(t_total),
            scf_wall_s=float(t_scf),
            opt_wall_s=float(t_opt),
            n_iter=int(getattr(result, "n_iter", 0)),
            converged=bool(getattr(result, "converged", False)),
            energy=float(getattr(result, "energy", 0.0)),
            out_path=str(out_path),
        )

        # "## References" block (Phase O5b) — embed the assembled
        # citation list in the .out so the text log is self-contained.
        if cite_block_text:
            f.write("\n" + cite_block_text + "\n")
            f.flush()


        # --- Harmonic vibrational analysis (finite-difference Hessian) ---
        if hessian:
            _scf_converged = bool(getattr(result, "converged", False))
            if not _scf_converged:
                f.write(
                    "\n  ## Vibrational Frequencies\n"
                    "  " + "-" * 52 + "\n"
                    "  SKIPPED -- SCF did not converge.\n"
                )
                f.flush()
            else:
                with (
                    plog.stage("hessian_fd", detail="finite-difference Hessian"),
                    PerfScope("hessian_fd"),
                ):
                    try:
                        from .hessian import (
                            HessianFDOptions,
                            compute_hessian_fd,
                            ir_intensities,
                        )
                        _hess_scf_opts = {
                            "rhf": rhf_options,
                            "uhf": uhf_options,
                            "rks": rks_options,
                            "uks": uks_options,
                        }.get(resolved_method)
                        _hess_opts = HessianFDOptions(
                            include_dipole_derivatives=True,
                        )
                        hessian_result = compute_hessian_fd(
                            molecule,
                            basis,
                            method=resolved_method.upper(),
                            scf_options=_hess_scf_opts,
                            hessian_options=_hess_opts,
                        )
                        f.write(
                            f"\n  ## Vibrational Frequencies\n"
                            f"  " + "-" * 52 + "\n"
                            f"  Finite-difference Hessian"
                            f"  (step = {_hess_opts.step_bohr:.3f} bohr,"
                            f"  {hessian_result.n_displacements} displacements)\n"
                            f"  Imaginary modes: {hessian_result.imaginary_count}\n"
                            f"  Linear molecule: {hessian_result.is_linear}\n\n"
                        )
                        _n_skip = 5 if hessian_result.is_linear else 6
                        _freqs = hessian_result.frequencies_cm1
                        _ir = None
                        try:
                            _ir = ir_intensities(hessian_result)
                        except Exception as _ir_exc:
                            f.write(
                                f"  (warning: IR intensities not available: "
                                f"{type(_ir_exc).__name__}: {_ir_exc})\n"
                            )
                        _header = "  {:<6s}  {:>10s}" + (
                            "  {:>12s}" if _ir is not None else ""
                        )
                        if _ir is not None:
                            f.write(
                                _header.format("Mode", "Freq/cm\u207b\u00b9", "IR/(km/mol)")
                                + "\n"
                            )
                            f.write("  " + "-" * 52 + "\n")
                            for k in range(_n_skip, len(_freqs)):
                                _label = f"{k - _n_skip + 1}"
                                _freq = _freqs[k]
                                _iri = _ir[k]
                                if _freq < 0:
                                    _freq_str = f"{abs(_freq):.1f}i"
                                else:
                                    _freq_str = f"{_freq:.1f}"
                                f.write(
                                    f"  {_label:<6s}  {_freq_str:>10s}  {_iri:>12.2f}\n"
                                )
                        else:
                            f.write(
                                _header.format("Mode", "Freq/cm\u207b\u00b9") + "\n"
                            )
                            f.write("  " + "-" * 52 + "\n")
                            for k in range(_n_skip, len(_freqs)):
                                _label = f"{k - _n_skip + 1}"
                                _freq = _freqs[k]
                                if _freq < 0:
                                    _freq_str = f"{abs(_freq):.1f}i"
                                else:
                                    _freq_str = f"{_freq:.1f}"
                                f.write(f"  {_label:<6s}  {_freq_str:>10s}\n")
                        f.write("\n")
                        f.flush()
                    except Exception as _hess_exc:
                        f.write(
                            f"\n  ## Vibrational Frequencies\n"
                            f"  " + "-" * 52 + "\n"
                            f"  FAILED: {type(_hess_exc).__name__}: {_hess_exc}\n"
                        )
                        f.flush()
                        hessian_result = None

    # --- QVF visualisation archive (v1) ----------------------------------
    if output_qvf:
        try:
            from vibeqc.output.formats.qvf import write_qvf as _write_qvf

            _bibtex = None
            if _refs is not None:
                try:
                    from vibeqc.output.citations.bibtex import format_bibtex

                    _bibtex = format_bibtex(_refs)
                except Exception:
                    pass
            _pop = None
            if write_population_file and has_mos:
                try:
                    from vibeqc.output.formats.population import (
                        compute_population_summary,
                    )

                    _pop = compute_population_summary(
                        result,
                        basis_obj,
                        molecule,
                    )
                except Exception:
                    pass

            _qvf_vol_data = None
            if has_mos and _cube_req.density:
                try:
                    from vibeqc.cube import _density_on_grid, make_uniform_grid

                    grid = make_uniform_grid(
                        molecule,
                        spacing=cube_spacing,
                        padding=cube_padding,
                    )
                    D = np.asarray(
                        result.density_a + result.density_b
                        if hasattr(result, "density_b")
                        else result.density,
                        dtype=float,
                    )
                    rho = _density_on_grid(D, basis_obj, grid)
                    # Reshape flat array to 3D using grid.shape
                    nx, ny, nz = grid.shape
                    data_3d = rho.reshape((nx, ny, nz))
                    # Convert to Å
                    bohr_to_a = 0.529177210903
                    origin_a = np.asarray(grid.origin, dtype=float) * bohr_to_a
                    spacing_a = np.asarray(grid.spacing, dtype=float) * bohr_to_a
                    voxel_vecs_a = np.diag(spacing_a)
                    _qvf_vol_data = {
                        "density": (
                            np.asarray(data_3d, dtype=np.float32),
                            origin_a,
                            voxel_vecs_a,
                        ),
                    }
                except Exception:
                    pass

            _qvf_mo_list = None
            if has_mos and _cube_req.mo_labels:
                try:
                    from vibeqc.output.formats.qvf import qvf_mo_data

                    _indices = requested_mo_indices(
                        _cube_req.mo_labels, result, molecule
                    )
                    _qvf_mo_list = qvf_mo_data(
                        result,
                        basis_obj,
                        molecule,
                        _indices,
                        spacing=cube_spacing,
                        padding=cube_padding,
                    )
                except Exception:
                    pass

            _qvf_path = _write_qvf(
                output_stem,
                _output_plan,
                molecule=molecule,
                result=result,
                method=resolved_method,
                basis=basis,
                functional=functional,
                population_summary=_pop,
                bibtex_content=_bibtex,
                wall_seconds=t_total,
                trajectory_frames=_traj_frames if _traj_frames else None,
                trajectory_energies=_traj_energies if _traj_energies else None,
                volume_data=_qvf_vol_data,
                mo_data=_qvf_mo_list,
                hessian_result=hessian_result,
            )
            try:
                _output_writer.record(_qvf_path, wall_time_s=t_total)
            except Exception:
                pass
        except Exception as _qvf_exc:
            import warnings

            warnings.warn(f"QVF write failed: {type(_qvf_exc).__name__}: {_qvf_exc}")

    # Finalise the {output}.system manifest (pre-v1.0 dispatch-
    # overhaul — replaces the bare end-of-job write_system_manifest
    # call). The OutputWriter, constructed before the SCF, already
    # wrote the [vibeqc] / [host] / [cpu] / ... / [plan] sections +
    # [outputs].status = "running". Here we (1) record every declared
    # artefact that actually landed on disk into [[outputs.files]]
    # — with bytes + sha256 + wall-time — so vq's fetch / status can
    # see exactly what to copy back, and (2) flip the status to
    # "complete" with the final wall-time. A non-converged SCF still
    # counts as "complete": the job ran to completion; the .out + the
    # result object carry the non-convergence.
    for _pf in _output_plan.files:
        if _pf.path.is_file():
            try:
                _output_writer.record(_pf.path)
            except Exception:
                # record() stats + hashes the file; a brittle write
                # at wrap-up must never tank a finished job.
                pass
    _output_writer.finish(wall_seconds=t_total)

    return result


__all__ = ["run_job"]
