Source code for vibeqc.runner

"""High-level "run a job" convenience — classic QC-program workflow.

A single call writes the output text file, the molden orbital file, and
(for geometry optimization) a trajectory animation — the kind of shape
users expect from Gaussian / ORCA / NWChem:

    from vibeqc import Atom, Molecule
    from vibeqc.runner import run_job

    mol = Molecule.from_xyz("h2o.xyz")
    run_job(mol, basis="6-31g*", method="rhf", output="h2o")
    # -> h2o.out, h2o.molden

    run_job(mol, basis="6-31g*", method="rks", functional="PBE",
            optimize=True, output="h2o_opt")
    # -> h2o_opt.out, h2o_opt.molden, h2o_opt.traj

All text output goes to ``{output}.out``. The molden file is written iff
the method produces molecular orbitals (RHF / UHF / RKS / UKS) and
``write_molden=True``. Geometry optimization uses ASE's BFGS and writes
one frame per step to ``{output}.traj``.
"""

from __future__ import annotations

import os
import time
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union

from ._vibeqc_core import (
    BasisSet,
    D3BJParams,
    Molecule,
    RHFOptions,
    RKSOptions,
    UHFOptions,
    UKSOptions,
    get_num_threads,
    run_rhf,
    run_rks,
    run_uhf,
    run_uks,
    set_num_threads,
)
from .banner import VIBEQC_VERSION, banner, library_versions
from .composites import (
    Availability,
    CompositeRecipe,
    CompositeUnavailable,
    resolve_composite,
)
from .crash_dump import crash_dump_context, dump_on_failure
from .dispersion import compute_d3bj, d3bj_params_for
from .dispersion_d4 import compute_d4, dftd4_available
from .dispersion_d4_parameters import normalize_d4_key
from .gcp import GCPDataMissing, compute_gcp
from .io.molden import write_molden
from .memory import check_memory, estimate_memory, format_memory_report
from .output import (
    OutputPlan,
    OutputWriter,
    dry_run_manifest,
    is_dry_run_requested,
    parse_write_cube_kwarg,
    requested_mo_indices,
    write_cube_density_for_run_job,
    write_cube_mo_for_run_job,
    write_population,
    write_xyz,
)
from .output._errors import (
    OutputFailureKind,
    warn_output_failure,
    warn_writer_failure,
)
from .output.citations import (
    format_references_block,
    load_default_database,
    write_bibtex,
    write_references,
)
from .perf import PerfScope
from .perf import perf_log as _perf_log_ctx
from .progress import ProgressLogger, resolve_progress
from .scf_log import format_scf_trace
from .solvers import (
    CASCIOptions,
    CASPT2Options,
    CASSCFOptions,
    DMRGOptions,
    Hamiltonian,
    SelectedCIOptions,
    SolverResult,
    TranscorrelatedOptions,
    V2RDMOptions,
    build_hamiltonian_mo,
    build_transcorrelated_hamiltonian,
    get_hf_orbital_provider,
    solve_dmrg,
    solve_selected_ci,
    solve_v2rdm,
)
from .structured_log import (
    StructuredLog,
    run_fingerprint,
)
from .structured_log import (
    structured_log as _structured_log_ctx,
)

DispersionSpec = Optional[object]  # str functional name | D3BJParams | None


def _resolve_dispersion(
    dispersion: DispersionSpec,
    functional: Optional[str],
) -> Optional[D3BJParams]:
    """Normalize the ``dispersion=`` argument into a D3BJParams object,
    or None if no dispersion correction is requested.

    * ``None`` or empty string → no dispersion.
    * ``True`` / ``"d3bj"`` → use D3-BJ params for the current functional.
    * ``"pbe" / "b3lyp" / ...`` → use D3-BJ params for that functional
      explicitly (useful for HF + D3-BJ).
    * :class:`D3BJParams` instance → used as-is.
    """
    if dispersion is None or dispersion is False:
        return None
    if isinstance(dispersion, D3BJParams):
        return dispersion
    if isinstance(dispersion, str):
        key = dispersion.strip().lower()
        if key in ("", "none", "false"):
            return None
        if key in ("d3bj", "true", "yes", "on"):
            if not functional:
                raise ValueError(
                    "dispersion='d3bj' requires a DFT functional so we can "
                    "look up damping parameters; pass functional='pbe', "
                    "etc., or give a functional name directly as the "
                    "dispersion argument."
                )
            lookup = functional
        else:
            lookup = dispersion
        p = d3bj_params_for(lookup)
        if p is None:
            raise ValueError(
                f"dispersion={dispersion!r}: no D3-BJ parameters found "
                f"for functional {lookup!r} (neither builtin nor dftd3 "
                f"backend knows this name). Install the dftd3 backend "
                f"for the full Grimme parameter set: "
                f"`pip install dftd3` into your vibe-qc venv, or "
                f"`pip install -e '.[dispersion]'` from the repo checkout."
            )
        return p
    if dispersion is True:
        if not functional:
            raise ValueError(
                "dispersion=True requires a DFT functional; pass functional='pbe', etc."
            )
        return d3bj_params_for(functional)
    raise TypeError(
        f"dispersion must be None, bool, str, or D3BJParams; got "
        f"{type(dispersion).__name__}"
    )


class _DispersionAugmented:
    """Transparent wrapper around an SCF result that exposes the
    dispersion contribution as extra attributes without overriding the
    raw SCF ``.energy``. Users opt in to the total energy via
    :attr:`energy_total`; everything else (mo_energies, density, fock,
    converged, n_iter, scf_trace, …) forwards to the underlying
    pybind11 SCF-result struct.

    We wrap rather than mutate because the C++ result classes have
    ``def_readonly`` fields — they can't be extended in place from
    Python.
    """

    __slots__ = ("_scf", "e_dispersion", "dispersion_params")

    def __init__(self, scf_result, e_dispersion: float, params: D3BJParams):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "e_dispersion", e_dispersion)
        object.__setattr__(self, "dispersion_params", params)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + dispersion energy (Hartree)."""
        return float(self._scf.energy) + float(self.e_dispersion)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}(energy={self._scf.energy:.10f}, "
            f"e_dispersion={self.e_dispersion:+.6e}, "
            f"energy_total={self.energy_total:.10f})"
        )


class _MP2Augmented:
    """Transparent wrapper exposing a post-SCF MP2 result without
    overriding the raw SCF ``.energy``. :attr:`mp2` is the C++
    ``MP2Result`` (``e_hf`` / ``e_os`` / ``e_ss`` / ``e_correlation`` /
    ``e_total``); :attr:`energy_total` is the MP2 total energy. Everything
    else (mo_energies, density, fock, converged, n_iter, scf_trace, …)
    forwards to the underlying pybind11 SCF-result struct.

    Mirrors :class:`_DispersionAugmented`: we wrap rather than mutate
    because the C++ result classes have ``def_readonly`` fields and no
    ``__dict__`` (so ``result.mp2 = …`` would raise).
    """

    __slots__ = ("_scf", "mp2")

    def __init__(self, scf_result, mp2_result):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "mp2", mp2_result)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + MP2 correlation energy (Hartree)."""
        return float(self.mp2.e_total)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}+MP2("
            f"e_scf={float(self._scf.energy):.10f}, "
            f"e_corr={float(self.mp2.e_correlation):+.6e}, "
            f"e_mp2_total={float(self.mp2.e_total):.10f})"
        )


class _DLPNOMP2Augmented:
    """SCF result + DLPNO-MP2 result, attribute-forwarding wrapper.

    Mirrors :class:`_MP2Augmented` — wraps rather than mutates because the
    C++ SCF result has ``def_readonly`` fields and no ``__dict__``.
    """

    __slots__ = ("_scf", "dlpno_mp2")

    def __init__(self, scf_result, dlpno_result):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "dlpno_mp2", dlpno_result)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + DLPNO-MP2 correlation energy (Hartree)."""
        return float(self.dlpno_mp2.e_total)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}+DLPNO-MP2("
            f"e_scf={float(self._scf.energy):.10f}, "
            f"e_corr={float(self.dlpno_mp2.e_corr):+.6e}, "
            f"e_total={float(self.dlpno_mp2.e_total):.10f})"
        )


class _DLPNOCCSDAugmented:
    """SCF result + DLPNO-CCSD pilot result, attribute-forwarding wrapper."""

    __slots__ = ("_scf", "dlpno_ccsd")

    def __init__(self, scf_result, cc_result):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "dlpno_ccsd", cc_result)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + CCSD correlation (+ (T) if computed), Hartree."""
        return float(self.dlpno_ccsd.e_total)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}+DLPNO-CCSD("
            f"e_scf={float(self._scf.energy):.10f}, "
            f"e_corr={float(self.dlpno_ccsd.e_corr):+.6e}, "
            f"e_t={float(self.dlpno_ccsd.e_t):+.3e}, "
            f"e_total={float(self.dlpno_ccsd.e_total):.10f})"
        )


class _CCSDAugmented:
    """SCF result + CCSD/CCSD(T) result, attribute-forwarding wrapper.

    Mirrors :class:`_MP2Augmented`: wraps rather than mutates because the
    C++ SCF result has ``def_readonly`` fields and no ``__dict__``.
    :attr:`ccsd` is the C++ ``CCSDResult`` (``e_ccsd_correlation`` /
    ``e_ccsd`` / ``e_t`` / ``e_ccsd_t`` / ``cc_trace`` / ...).
    """

    __slots__ = ("_scf", "ccsd")

    def __init__(self, scf_result, cc_result):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "ccsd", cc_result)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + CCSD (+ (T) when computed) total energy (Hartree)."""
        return float(self.ccsd.e_total)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}+CCSD("
            f"e_scf={float(self._scf.energy):.10f}, "
            f"e_corr={float(self.ccsd.e_ccsd_correlation):+.6e}, "
            f"e_t={float(self.ccsd.e_t):+.6e}, "
            f"e_total={float(self.ccsd.e_total):.10f})"
        )


class _OVGFAugmented:
    """Transparent wrapper exposing OVGF / GF2 quasiparticle results.

    :attr:`ovgf` is the list of :class:`vibeqc.propagator.QuasiparticleResult`
    (one per corrected orbital, carrying ε_scf / ε_qp / pole strength).  All SCF
    attributes forward to the underlying result (mirrors :class:`_MP2Augmented`;
    the C++ result has no ``__dict__``).
    """

    __slots__ = ("_scf", "ovgf")

    def __init__(self, scf_result, ovgf_results):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "ovgf", ovgf_results)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    def __repr__(self) -> str:
        n = len(self.ovgf)
        return f"{type(self._scf).__name__}+OVGF({n} quasiparticle orbitals)"


if TYPE_CHECKING:
    from .mlip import MLIPOptions
    from .semiempirical.methods.msindo_ccm import CCMOptions
    from .thermo import ThermoOptions


Method = Literal[
    "rhf",
    "rks",
    "uhf",
    "uks",
    "auto",
    "selected_ci",
    "dmrg",
    "v2rdm",
    "transcorrelated_ci",
    "casci",
    "mrci",
    "casscf",
    "fci",
    "ccsd",
    "ccsd(t)",
    # Post-SCF Møller–Plesset (RHF reference + native C++ MP2; SCS/SOS via
    # the c_os/c_ss spin-component scaling — see vibeqc.correlation).
    "mp2",
    "scs-mp2",
    "sos-mp2",
    # DLPNO-MP2 (RHF reference + vibeqc.dlpno.mp2 — Foster-Boys LMOs, PAO
    # domains, semicanonical PNOs, coupled LMP2 residuals; Pinski 2015).
    "dlpno-mp2",
    # DLPNO-CCSD / CCSD(T) correctness pilot (vibeqc.dlpno.ccsd —
    # subspace-projected through the FCI-anchored spin-orbital engine;
    # O(N^6), capped at max_nbf; Riplinger 2013).
    "dlpno-ccsd",
    "dlpno-ccsd(t)",
    # Green's-function quasiparticle IPs/EAs (RHF reference + the diagonal
    # second-order self-energy — see vibeqc.propagator).
    "ovgf",
    # Composite 3c keywords (v0.9.0 — see vibeqc.composites). The
    # dispatcher resolves these via _apply_composite before any of the
    # other Method literal branches see them.
    "hf-3c",
    "pbeh-3c",
    "b97-3c",
    "b3lyp-3c",
    "r2scan-3c",
    "wb97x-3c",
    "hse-3c",
    # Semiempirical methods.
    "dftb0",
    "scc_dftb",
    "pm6",
    "gfn2_xtb",
    "om1",
    "om2",
    "om3",
    "msindo",
    # Periodic MSINDO via the Cyclic Cluster Model (needs ccm_options with the
    # supercell lattice vectors).  Routed through vibeqc.semiempirical.methods.
    # msindo_ccm.run_ccm.
    "ccm",
    # Machine-learning interatomic potential — external pre-trained
    # model (ACEsuit MACE). Routed through vibeqc.mlip.mace.
    "mace",
]

# Machine-learning interatomic potentials (external pre-trained models,
# routed through vibeqc.mlip). Basis-set-free like the semiempirical
# methods; unlike them, vibe-qc drives a third-party pre-trained forward
# pass (maintainer-approved CLAUDE.md §10 extension). Kept as a module
# constant so the basis-skip sites in run_job stay in sync.
_MLIP_METHODS = frozenset({"mace"})

# Post-SCF correlation methods: an RHF/UHF SCF runs first (resolved_method),
# but citations + the output label key off the *original* method so the
# correlation papers (routes.methods.{ccsd,ccsd(t),mp2,scs-mp2,sos-mp2}) fire.
_POSTSCF_CITE_METHODS = frozenset(
    {"ccsd", "ccsd(t)", "mp2", "scs-mp2", "sos-mp2", "dlpno-mp2",
     "dlpno-ccsd", "dlpno-ccsd(t)", "ovgf"}
)


def _select_method(
    method: Method, molecule: Molecule, functional: Optional[str]
) -> str:
    """Resolve ``method='auto'`` against molecule.multiplicity + functional.

    Heuristic wavefunction routing by electron count:
    - ≤4 electrons → FCI (exact for tiny systems)
    - ≤8 electrons → Selected-CI (systematically improvable)
    - >8 electrons → HF (via SCF)

    ``ccsd`` and ``ccsd(t)`` run RHF SCF first, then post-SCF coupled-cluster."""
    if method == "auto":
        if functional:
            return "uks" if molecule.multiplicity > 1 else "rks"
        n_elec = molecule.n_electrons()
        if n_elec <= 4:
            return "fci"
        if n_elec <= 8:
            return "selected_ci"
        return "uhf" if molecule.multiplicity > 1 else "rhf"
    if method in ("ccsd", "ccsd(t)"):
        return "rhf"
    # Post-SCF MP2 (and its spin-component-scaled variants) runs the mean-field
    # SCF first — RHF for closed shell, UHF for open shell (native run_ump2);
    # the post-SCF step keys off the original ``method`` downstream.
    if method in ("mp2", "scs-mp2", "sos-mp2"):
        return "uhf" if molecule.multiplicity > 1 else "rhf"
    # DLPNO-MP2 is closed-shell only for now (open-shell DLPNO is a later
    # milestone); the dispatch branch raises a clear error on multiplicity.
    if method in ("dlpno-mp2", "dlpno-ccsd", "dlpno-ccsd(t)"):
        return "rhf"
    # OVGF (diagonal-self-energy) runs the mean-field SCF first — RHF for closed
    # shell, UHF for open shell (the spin-resolved spin-orbital self-energy).
    if method == "ovgf":
        return "uhf" if molecule.multiplicity > 1 else "rhf"
    return method


def _apply_composite(
    method: str,
    *,
    basis: Optional[str],
    functional: Optional[str],
    dispersion,
    molecule: Molecule,
) -> tuple[str, str, Optional[str], object, Optional[CompositeRecipe]]:
    """Resolve a composite-3c ``method=`` keyword (``"hf-3c"``,
    ``"pbeh-3c"``, …) into the (method, basis, functional, dispersion,
    recipe) tuple the rest of :func:`run_job` consumes.

    Non-composite ``method`` values pass through unchanged with
    ``recipe = None``. For a composite:

    * basis / functional / dispersion are taken from the recipe when
      the caller left them at defaults; an explicit caller basis wins
      with a warning (deliberate-experiment escape hatch).
    * D3-BJ recipes carry their own re-fit damping (``d3bj_damping``);
      it is passed straight through as a :class:`D3BJParams` so the
      dispersion module doesn't fall back to a wrong per-functional
      lookup.
    * Recipes flagged ``PENDING_*`` raise :class:`CompositeUnavailable`
      with a pointer at the gating roadmap item.
    """
    recipe = resolve_composite(method)
    if recipe is None:
        return method, basis or "", functional, dispersion, None

    if recipe.availability in (
        Availability.PENDING_F1,
        Availability.PENDING_F3,
        Availability.PENDING_ECP,
    ):
        raise CompositeUnavailable(
            f"method={method!r}: {recipe.availability.value}. "
            f"This composite depends on infrastructure not yet on "
            f"main. See docs/user_guide/composites.md § Availability.\n"
            f"  Notes: {recipe.notes}"
        )

    resolved_basis = basis if basis else recipe.basis
    if basis and basis.lower() != recipe.basis.lower():
        import warnings

        warnings.warn(
            f"method={method!r} specifies basis={recipe.basis!r}, but "
            f"caller passed basis={basis!r}. Honouring the explicit "
            f"basis — the published 3c parameters were fit at the "
            f"composite's native basis and may not transfer.",
            stacklevel=2,
        )
    resolved_functional = functional if functional else recipe.functional

    if dispersion is None:
        if recipe.dispersion == "d3bj" and recipe.d3bj_damping is not None:
            resolved_dispersion: object = D3BJParams(
                s6=recipe.d3bj_damping.s6,
                s8=recipe.d3bj_damping.s8,
                a1=recipe.d3bj_damping.a1,
                a2=recipe.d3bj_damping.a2,
            )
        else:
            resolved_dispersion = recipe.dispersion
    else:
        resolved_dispersion = dispersion

    # Pure-HF composites (recipe.functional is None — only HF-3c today)
    # must route straight to the HF SCF driver. Handing "auto" to
    # _select_method with no functional would drop into the
    # wavefunction auto-ladder (≤4e → FCI, ≤8e → selected-CI), so a
    # turnkey hf-3c on H2 silently ran fci(ndet=4) instead of HF
    # (audit F1.1). DFT composites keep "auto" → _select_method picks
    # rks/uks from multiplicity.
    if resolved_functional is None:
        resolved_method = "uhf" if molecule.multiplicity > 1 else "rhf"
    else:
        resolved_method = "auto"
    return (
        resolved_method,
        resolved_basis,
        resolved_functional,
        resolved_dispersion,
        recipe,
    )


def _detect_scf_accelerator(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> Optional[str]:
    """Return the lower-cased SCFAccelerator name (``"diis"`` / ``"ediis"``
    / ``"ediis_diis"`` / ``"kdiis"`` / ``"adiis"``) actually used by the
    resolved SCF, or ``None`` when no options struct is in play (post-SCF
    / non-mean-field methods).

    The citation router uses this to fire the right SCF-accelerator
    references — Hu-Yang 2010 for ADIIS, Garza-Scuseria 2012 for KDIIS,
    Kudin-Scuseria-Cancès 2002 for EDIIS — instead of always crediting
    plain DIIS (Pulay 1980 / 1982).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return None
    accel = getattr(opts, "scf_accelerator", None)
    if accel is None:
        return None
    name = getattr(accel, "name", None) or str(accel)
    return name.strip().lower()


def _detect_uses_ecp(
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> bool:
    """Return True when any options struct carries a non-empty
    ``ecp_centers`` list — the runtime signal that libecpint will build
    the V_ECP operator.

    Conservative: returns False when no options struct is present at
    all (post-SCF / FCI / selected-CI paths build their own HF prep
    without going through these options).
    """
    for opts in (rhf_options, uhf_options, rks_options, uks_options):
        if opts is None:
            continue
        centers = getattr(opts, "ecp_centers", None)
        if centers:  # non-empty list
            return True
    return False


def _detect_direct_scf(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
    basis_name: str,
    molecule: Molecule,
) -> bool:
    """Return True when the SCF will use the direct (integral-driven)
    Fock build path — either because ``scf_mode=DIRECT`` was explicitly
    set, or because AUTO resolved to DIRECT (basis larger than the
    auto threshold, and density_fit/cosx are off).

    Conservative: returns False when we can't determine the resolved
    mode (no options struct, or density_fit/cosx supersedes).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return False

    # density_fit + cosx supersede the four-index path — direct SCF
    # screening is not used, so the direct-SCF references don't apply.
    if getattr(opts, "density_fit", False):
        return False

    scf_mode = getattr(opts, "scf_mode", None)
    if scf_mode is None:
        return False

    from ._vibeqc_core import SCFMode

    if scf_mode == SCFMode.DIRECT:
        return True
    if scf_mode == SCFMode.AUTO:
        threshold = getattr(opts, "scf_mode_auto_threshold", 200)
        n_bf = BasisSet(molecule, basis_name).nbasis
        return n_bf > threshold
    # CONVENTIONAL — not direct.
    return False


def _detect_acceleration(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> list[str]:
    """Return the integral/exchange-acceleration technique keys engaged by
    the resolved SCF, for ``assemble(acceleration=...)``.

    * ``density_fit=True`` (RI-J Coulomb fitting) ⇒ ``["rij"]`` — Whitten
      1973 / Dunlap 1979 / Eichkorn 1995 + 1997.
    * ``density_fit=True`` *and* ``cosx=True`` (RIJCOSX: RI-J Coulomb +
      chain-of-spheres exchange) ⇒ ``["rijcosx"]`` — Neese 2009. COSX
      requires density fitting (the C++ JK dispatch rejects ``cosx`` without
      ``density_fit``), so a lone ``cosx`` flag still maps to RIJCOSX.

    Empty list for post-SCF / non-mean-field methods (no options struct in
    the rhf/uhf/rks/uks map) and for the conventional / direct four-index
    path (neither density_fit nor cosx set).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return []
    density_fit = bool(getattr(opts, "density_fit", False))
    cosx = bool(getattr(opts, "cosx", False))
    if cosx:  # COSX always pairs the RI-J Coulomb build → RIJCOSX
        return ["rijcosx"]
    if density_fit:
        return ["rij"]
    return []


def _detect_soscf_trah(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> tuple[bool, bool]:
    """Return ``(uses_soscf, uses_trah)`` for the resolved SCF.

    Both second-order convergers are armed by a *positive* threshold on the
    options struct (``soscf_threshold`` / ``trah_threshold``): the driver
    switches from Roothaan diagonalisation to the second-order step once the
    orbital-gradient norm drops below it (cpp/src/soscf.hpp, trah.hpp). A
    threshold of 0.0 (the default) means "never engage" — no citation. Both
    False for methods with no options struct.
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return (False, False)

    def _armed(attr: str) -> bool:
        try:
            return float(getattr(opts, attr, 0.0) or 0.0) > 0.0
        except (TypeError, ValueError):
            return False

    return (_armed("soscf_threshold"), _armed("trah_threshold"))


# SCF initial-guess enum names → citation-route keys. Only guesses with a
# defining-paper route appear; the traditional Hcore guess and AUTO (resolved
# at runtime) carry none, so they map to nothing.
_SCF_GUESS_ROUTE_KEYS = {
    "sad": "sad",
    "sap": "sap",
    "hueckel": "huckel",
    "huckel": "huckel",
}


def _detect_scf_guess(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> Optional[str]:
    """Return the citation route key for the SCF initial guess explicitly
    requested on the options struct (``"sad"`` / ``"sap"`` / ``"huckel"``),
    or ``None`` for AUTO / HCORE / unrouted guesses and for methods with no
    options struct.

    Only an *explicit* non-AUTO selection cites a guess paper — the AUTO
    resolution is a runtime decision not surfaced on the options struct, and
    Hcore (Hückel-free bare-core guess) has no single defining publication.
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return None
    guess = getattr(opts, "initial_guess", None)
    if guess is None:
        return None
    name = (getattr(guess, "name", None) or str(guess)).strip().lower()
    return _SCF_GUESS_ROUTE_KEYS.get(name)


def _run_single_point(
    method: str,
    molecule: Molecule,
    basis: BasisSet,
    *,
    functional: Optional[str],
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
    selected_ci_options: Optional[SelectedCIOptions] = None,
    dmrg_options: Optional[DMRGOptions] = None,
    v2rdm_options: Optional[V2RDMOptions] = None,
    transcorrelated_options: Optional[TranscorrelatedOptions] = None,
    casci_options: Optional[CASCIOptions] = None,
    caspt2_options: Optional[CASPT2Options] = None,
    casscf_options: Optional[CASSCFOptions] = None,
    active_space: Optional[tuple[int, int]] = None,
    cas_reference: Optional[str] = None,
    mlip_options: Optional[MLIPOptions] = None,
    ccm_options: Optional["CCMOptions"] = None,
    solvent: object = None,
    dft_plus_u: Optional[List["HubbardSite"]] = None,
    nddo: bool = False,
):
    """Single-point dispatcher. ``solvent`` (v0.9.0) reroutes through
    :func:`vibeqc.solvation.run_cpcm_scf` and returns the underlying
    SCF result with an ``e_solv`` / ``solvent_result`` attribute
    attached for downstream output writers."""
    method_opts = {
        "rhf": rhf_options or RHFOptions(),
        "uhf": uhf_options or UHFOptions(),
    }
    if method in ("rks", "uks"):
        opts = (rks_options if method == "rks" else uks_options) or (
            RKSOptions() if method == "rks" else UKSOptions()
        )
        opts.functional = functional or opts.functional or "lda"
        method_opts[method] = opts

    if solvent is not None and method in ("rhf", "uhf", "rks", "uks"):
        if dft_plus_u:
            raise NotImplementedError(
                "DFT+U combined with implicit solvation (CPCM) is not yet "
                "supported. The +U Fock term and the solvation macro-"
                "iteration would need to be combined inside "
                "vibeqc.solvation.run_cpcm_scf; defer until Increment "
                "3+ or run +U without solvent."
            )
        # CPCM macro-iteration wrapper. The C++ result types
        # (RHFResult / UHFResult / RKSResult / UKSResult) are
        # pybind11 ``def_readonly`` — Python attribute set on them
        # silently fails inside try/except. Wrap the inner result in
        # an attribute-forwarding proxy so callers can still write
        # ``result.energy`` / ``result.density`` / etc., and *also*
        # read the new solvation diagnostics (``result.solvent_result``,
        # ``result.e_solv``, ``result.energy_in_solvent``).
        from .solvation import run_cpcm_scf
        from .solvation.driver import _solvent_aware_scf_result

        sol = run_cpcm_scf(
            molecule,
            basis,
            method=method,
            solvent=solvent,
            options=method_opts[method],
        )
        return _solvent_aware_scf_result(sol)

    if dft_plus_u and method in ("rhf", "uhf", "rks", "uks"):
        from .dft_plus_u import _apply_dft_plus_u_to_options

        _apply_dft_plus_u_to_options(method_opts[method], basis, dft_plus_u)

    if method == "rhf":
        return run_rhf(molecule, basis, method_opts["rhf"])
    if method == "uhf":
        return run_uhf(molecule, basis, method_opts["uhf"])
    if method == "rks":
        return run_rks(molecule, basis, method_opts["rks"])
    if method == "uks":
        return run_uks(molecule, basis, method_opts["uks"])

    # ── Non-mean-field wavefunction methods ──
    if method in (
        "selected_ci",
        "dmrg",
        "v2rdm",
        "transcorrelated_ci",
        "casci",
        "mrci",
        "casscf",
        "nevpt2",
        "caspt2",
    ):
        import numpy as np

        # Starting orbitals for the determinant-solver family.  The
        # open-shell default is UHF natural orbitals (UNO-CAS — Pulay &
        # Hamilton, J. Chem. Phys. 88, 4926 (1988)): one spin-restricted
        # orbital set ordered by descending occupation, so the
        # doubly-occupied core and the active window are well-defined.
        # (Plain "uhf" keeps only the α orbitals; the β space — and hence
        # the core — is then only approximately represented.)  Override
        # with cas_reference="rhf"|"uhf"|"uno".
        if cas_reference is not None and cas_reference not in (
            "rhf",
            "uhf",
            "uno",
        ):
            raise ValueError(
                f"cas_reference must be 'rhf', 'uhf' or 'uno', got "
                f"{cas_reference!r}"
            )
        hf_method = cas_reference or (
            "uno" if molecule.multiplicity > 1 else "rhf"
        )
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        H = build_hamiltonian_mo(molecule, basis, C)

        # Optional active-space truncation.  The CAS methods (casci / nevpt2 /
        # caspt2) are handled separately below — they keep the full MO
        # integrals and dress an explicit frozen core (standard CAS
        # convention), rather than dropping the core + virtual orbitals the
        # way these CI/DMRG/v2RDM backends do.
        if active_space is not None and method not in (
            "casci",
            "mrci",
            "casscf",
            "nevpt2",
            "caspt2",
        ):
            n_active, n_elec = active_space
            n_frozen = max(0, H.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            H = build_hamiltonian_mo(molecule, basis, C_act)
            H.nelec = n_elec
            H.ms2 = 0

        if method == "transcorrelated_ci":
            if transcorrelated_options is None:
                transcorrelated_options = TranscorrelatedOptions()
            H = build_transcorrelated_hamiltonian(H, transcorrelated_options)
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "selected_ci":
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "dmrg":
            return solve_dmrg(H, dmrg_options or DMRGOptions())

        if method == "v2rdm":
            return solve_v2rdm(H, v2rdm_options or V2RDMOptions())

        if method in ("casci", "mrci", "casscf", "nevpt2", "caspt2"):
            from .solvers._casci import casci as _run_casci

            # Standard CAS partition: full MO integrals with an explicit frozen
            # core.  active_space=(n_active_orb, n_active_elec) selects the
            # lowest n_core orbitals as doubly-occupied core, n_active_orb
            # active orbitals, the rest virtual (matching PySCF mcscf.CASCI).
            # H here is the full, untruncated Hamiltonian.
            n_elec_total = molecule.n_electrons()
            if active_space is not None:
                n_active_orb, n_active_elec = active_space
                if (n_elec_total - n_active_elec) % 2 != 0:
                    raise ValueError(
                        "CAS active_space=(n_orb, n_elec) needs an even "
                        f"frozen-core electron count; got n_elec_total="
                        f"{n_elec_total}, n_active_elec={n_active_elec}."
                    )
                n_core = (n_elec_total - n_active_elec) // 2
            else:
                n_active_orb, n_active_elec, n_core = H.norb, n_elec_total, 0
            n_virt = H.norb - n_core - n_active_orb

            if method == "casscf":
                from .solvers._casscf import casscf as _run_casscf

                c_opts = casscf_options or CASSCFOptions()
                if (
                    getattr(c_opts, "pt2", None) is not None
                    and c_opts.ci_solver != "selected_ci"
                ):
                    raise ValueError(
                        "CASSCFOptions.pt2 is the Epstein-Nesbet PT2 "
                        "stage on a SELECTED wavefunction; it requires "
                        "ci_solver='selected_ci' (the exact-CI backend "
                        "has no external perturbers)"
                    )
                sc = _run_casscf(
                    H.h1e,
                    H.h2e,
                    n_active_elec=n_active_elec,
                    n_active_orb=n_active_orb,
                    n_core=n_core,
                    nuclear_repulsion=H.nuclear_repulsion,
                    ms2=molecule.multiplicity - 1,
                    nroots=c_opts.nroots,
                    weights=c_opts.weights,
                    orbital_step=c_opts.orbital_step,
                    active_orbitals=c_opts.active_orbitals,
                    spin_pure=c_opts.spin_pure,
                    ci_solver=c_opts.ci_solver,
                    selected_ci_options=c_opts.selected_ci_options,
                )
                label = f"casscf({n_active_elec}e,{n_active_orb}o)"
                if c_opts.ci_solver == "selected_ci":
                    label += "_selci"
                if sc.e_totals:
                    label += f"_sa{len(sc.e_totals)}"
                _sel_pt2 = None
                if getattr(c_opts, "pt2", None) is not None:
                    # SHCI perturbative stage on the converged selected
                    # wavefunction, in the converged orbital basis (the
                    # variational headline energy is unchanged; the PT2
                    # estimate surfaces on its own).
                    from .solvers._selected_ci import selected_ci_pt2

                    p = c_opts.pt2
                    _sel_pt2 = selected_ci_pt2(
                        sc.h1e_cas,
                        sc.h2e_cas,
                        n_active_elec,
                        n_active_orb,
                        n_core,
                        result=sc.cas,
                        eps2=p.eps2,
                        n_samples=p.n_samples,
                        sample_size=p.sample_size,
                        eps2_loose=p.eps2_loose,
                        seed=p.seed,
                    )
                return SolverResult(
                    energy=sc.e_total,
                    method=label,
                    converged=sc.converged,
                    n_iter=sc.n_iter,
                    energy_trace=sc.energy_trace or [sc.e_total],
                    root_energies=list(sc.e_totals) or None,
                    root_s2=_root_s2_values(
                        sc.cas, molecule.multiplicity - 1
                    ),
                    ci_coeffs=sc.cas.ci_coeffs,
                    ci_labels=sc.cas.determinants,
                    rdm1=_active_rdm1(sc.cas),
                    selected_pt2=_sel_pt2,
                )

            # CASSCF-referenced MR-PT2: supplying casscf_options switches the
            # nevpt2 / caspt2 reference from CASCI-on-HF-orbitals to a CASSCF
            # whose converged integrals + lowest-root CI vector seed the PT2 —
            # the standard CASSCF→CASPT2/NEVPT2 composition (mirrors the MRCI
            # pattern below).  Without casscf_options the historical
            # CASCI-on-HF reference is kept (matches the recorded OpenMolcas
            # `RASSCF CIonly` parity constants).
            h1_ref, h2_ref = H.h1e, H.h2e
            ref_suffix = ""
            ref_root_energies: list[float] = []
            if method in ("nevpt2", "caspt2") and casscf_options is not None:
                from .solvers._casscf import casscf as _run_casscf

                c_opts = casscf_options
                if c_opts.active_orbitals is not None:
                    raise ValueError(
                        "CASSCF-referenced MR-PT2 does not support "
                        "active_orbitals window selection yet; use the "
                        "default lowest-core active window."
                    )
                if c_opts.ci_solver != "casci":
                    raise ValueError(
                        "CASSCF-referenced MR-PT2 requires the exact CI "
                        "backend (the PT2 engines re-solve the full CAS "
                        "determinant space on the converged orbitals); "
                        "ci_solver='selected_ci' is method='casscf' only"
                    )
                # spin_pure=None resolves inside casscf(): True for SA
                # references (nroots > 1) since 2026-06-11, covering the
                # multi-state CASPT2 composition (whose ⟨S²⟩-filtered
                # model space needs same-spin SA orbitals), and False for
                # single-state references.
                sc = _run_casscf(
                    H.h1e,
                    H.h2e,
                    n_active_elec=n_active_elec,
                    n_active_orb=n_active_orb,
                    n_core=n_core,
                    nuclear_repulsion=H.nuclear_repulsion,
                    ms2=molecule.multiplicity - 1,
                    nroots=c_opts.nroots,
                    weights=c_opts.weights,
                    orbital_step=c_opts.orbital_step,
                    spin_pure=c_opts.spin_pure,
                )
                if not sc.converged:
                    raise RuntimeError(
                        f"CASSCF reference for method={method!r} did not "
                        f"converge (|g| = {sc.grad_norm:.2e} after "
                        f"{sc.n_iter} macro-iterations)"
                    )
                cas = sc.cas
                h1_ref, h2_ref = sc.h1e_cas, sc.h2e_cas
                ref_suffix = "_casscf"
                ref_root_energies = list(sc.e_totals)
            else:
                # casci_options is method="casci"-only; the CASCI built here
                # as the *reference* for nevpt2 / caspt2 / mrci keeps the
                # solver defaults (single root — the PT2 / MRCI engines seed
                # from root 0; the MS-CASPT2 model space re-solves its own
                # multi-root CASCI inside ms_caspt2()).
                ci_opts = (
                    (casci_options or CASCIOptions())
                    if method == "casci"
                    else CASCIOptions()
                )
                cas = _run_casci(
                    H.h1e,
                    H.h2e,
                    n_active_elec=n_active_elec,
                    n_active_orb=n_active_orb,
                    n_core=n_core,
                    nuclear_repulsion=H.nuclear_repulsion,
                    ms2=molecule.multiplicity - 1,
                    nroots=ci_opts.nroots,
                    max_det=ci_opts.max_det,
                )

            if method == "casci":
                energy, label = cas.e_total, f"casci({n_active_elec}e,{n_active_orb}o)"
                if cas.nroots > 1:
                    # Multi-root CASCI: per-root energies + ⟨S²⟩ reach
                    # SolverResult.root_energies / .root_s2 via the shared
                    # return below (headline ``energy`` stays root 0).
                    ref_root_energies = list(cas.e_totals)
            elif method == "mrci":
                from .solvers._mrci import mrci as _run_mrci

                # If casscf_options are provided, run CASSCF first for
                # an orbital-optimized reference.
                if casscf_options is not None:
                    from .solvers._casscf import casscf as _run_casscf

                    c_opts = casscf_options or CASSCFOptions()
                    if c_opts.ci_solver != "casci":
                        raise ValueError(
                            "CASSCF-referenced MRCI requires the exact CI "
                            "backend; ci_solver='selected_ci' is "
                            "method='casscf' only"
                        )
                    sc = _run_casscf(
                        H.h1e,
                        H.h2e,
                        n_active_elec=n_active_elec,
                        n_active_orb=n_active_orb,
                        n_core=n_core,
                        nuclear_repulsion=H.nuclear_repulsion,
                        ms2=molecule.multiplicity - 1,
                        nroots=c_opts.nroots,
                        weights=c_opts.weights,
                        orbital_step=c_opts.orbital_step,
                        active_orbitals=c_opts.active_orbitals,
                        spin_pure=c_opts.spin_pure,
                    )
                    h1, h2 = sc.h1e_cas, sc.h2e_cas
                else:
                    h1, h2 = H.h1e, H.h2e

                mc = _run_mrci(
                    h1,
                    h2,
                    n_active_elec=n_active_elec,
                    n_active_orb=n_active_orb,
                    n_core=n_core,
                    nuclear_repulsion=H.nuclear_repulsion,
                    ms2=molecule.multiplicity - 1,
                )
                label = f"mrci({n_active_elec}e,{n_active_orb}o)"
                if casscf_options is not None:
                    label += "_casscf"
                energy = mc.e_total
            elif method == "nevpt2":
                from .solvers._mrpt import nevpt2 as _run_nevpt2

                pt = _run_nevpt2(cas, h1_ref, h2_ref, n_core=n_core, n_virt=n_virt)
                energy = pt.e_total
                label = f"nevpt2({n_active_elec}e,{n_active_orb}o){ref_suffix}"
            else:  # caspt2 — internally-contracted (default), un-gated
                from .solvers._mrpt import caspt2 as _run_caspt2

                cp = caspt2_options or CASPT2Options()
                if cp.multistate is not None:
                    # MS/XMS-CASPT2 (Finley 1998 / Granovsky 2011 +
                    # Shiozaki 2011): multi-state effective Hamiltonian over
                    # the lowest nroots spin-pure CASCI roots of the
                    # reference basis (HF orbitals, or the converged
                    # SA-CASSCF basis when casscf_options are supplied).
                    from .solvers._ms_caspt2 import ms_caspt2 as _run_ms

                    if cp.multistate not in ("ms", "xms"):
                        raise ValueError(
                            "caspt2_options.multistate must be None, 'ms' "
                            f"or 'xms', got {cp.multistate!r}"
                        )
                    if cp.variant != "ic":
                        raise ValueError(
                            "multi-state CASPT2 requires variant='ic' "
                            f"(got {cp.variant!r})"
                        )
                    if cp.ipea != 0.0:
                        raise ValueError(
                            "multi-state CASPT2 supports ipea=0 only (the "
                            "IPEA shift is gated and explicit-engine-only)"
                        )
                    ms_nroots = cp.nroots or (
                        casscf_options.nroots if casscf_options else 0
                    )
                    if ms_nroots < 2:
                        raise ValueError(
                            "multi-state CASPT2 needs >= 2 model states: "
                            "set caspt2_options.nroots (CASCI reference) "
                            "or casscf_options.nroots (SA-CASSCF reference)"
                        )
                    msr = _run_ms(
                        h1_ref,
                        h2_ref,
                        n_core,
                        n_virt,
                        nroots=ms_nroots,
                        nuclear_repulsion=H.nuclear_repulsion,
                        mode=cp.multistate,
                        n_frozen=cp.n_frozen,
                        imaginary=cp.imaginary,
                        ms2=molecule.multiplicity - 1,
                        n_act_elec=n_active_elec,
                    )
                    label = (
                        f"caspt2({n_active_elec}e,{n_active_orb}o)"
                        f"_{cp.multistate}{ms_nroots}{ref_suffix}"
                    )
                    return SolverResult(
                        energy=msr.e_total,
                        method=label,
                        converged=True,
                        n_iter=1,
                        energy_trace=[msr.e_total],
                        root_energies=list(msr.energies),
                        ci_coeffs=cas.ci_coeffs,
                        ci_labels=cas.determinants,
                        rdm1=_active_rdm1(cas),
                        multistate=dict(
                            mode=msr.mode,
                            nroots=msr.nroots,
                            heff=msr.heff,
                            heff_asym=msr.heff_asym,
                            mixing=msr.mixing,
                            ss_energies=list(msr.ss_energies),
                            ref_energies=list(msr.ref_energies),
                            e2_corr=list(msr.e2_corr),
                            xms_rotation=msr.xms_rotation,
                            s2_values=list(msr.s2_values),
                        ),
                    )
                pt = _run_caspt2(
                    cas,
                    h1_ref,
                    h2_ref,
                    n_core=n_core,
                    n_virt=n_virt,
                    variant=cp.variant,
                    ipea=cp.ipea,
                    imaginary=cp.imaginary,
                    n_frozen=cp.n_frozen,
                )
                energy = pt.e_total
                label = f"caspt2({n_active_elec}e,{n_active_orb}o){ref_suffix}"

            return SolverResult(
                energy=energy,
                method=label,
                # Direct CI + non-iterative PT2: a returned result means the
                # eigensolve succeeded.
                converged=True,
                n_iter=1,
                energy_trace=[energy],
                root_energies=ref_root_energies or None,
                root_s2=(
                    _root_s2_values(cas, molecule.multiplicity - 1)
                    if ref_root_energies
                    else None
                ),
                # Reference-wavefunction diagnostics (the CASCI / CASSCF
                # reference for PT2 methods): leading configurations +
                # active natural occupations in the .out solver block.
                ci_coeffs=cas.ci_coeffs,
                ci_labels=cas.determinants,
                rdm1=_active_rdm1(cas),
            )

    if method == "fci":
        import numpy as np
        from scipy.linalg import eigh

        from .solvers import (
            build_hamiltonian_matrix_unrestricted,
            diagonal_matrix_element_unrestricted,
            generate_determinants,
        )

        hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        ham = build_hamiltonian_mo(molecule, basis, C)

        if active_space is not None:
            n_active, n_elec = active_space
            n_frozen = max(0, ham.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            ham = build_hamiltonian_mo(molecule, basis, C_act)
            ham.nelec = n_elec

        norb = ham.norb
        nelec = ham.nelec
        nalpha = (nelec + ham.ms2) // 2
        nbeta = (nelec - ham.ms2) // 2

        all_dets = generate_determinants(norb, nalpha, nbeta)
        H = build_hamiltonian_matrix_unrestricted(all_dets, ham.h1e, ham.h2e)
        evals, evecs = eigh(H)
        e_fci = evals[0] + ham.nuclear_repulsion

        return SolverResult(
            energy=e_fci,
            method=f"fci(ndet={len(all_dets)})",
            converged=True,
            n_iter=1,
            energy_trace=[e_fci],
            ci_coeffs=evecs[:, 0],
            ci_labels=all_dets,
        )

    # ── Semiempirical methods ──
    se_methods = {
        "dftb0",
        "scc_dftb",
        "pm6",
        "gfn2_xtb",
        "om1",
        "om2",
        "om3",
        "msindo",
    }
    if method in se_methods:
        if solvent is not None and method == "msindo":
            # MSINDO implicit solvation (COSMO) — its own multipole solute↔cavity
            # coupling + GEPOL cavity, not run_job's HF/DFT ESP-on-grid path
            # above. Resolve the solvent to a dielectric and run the dedicated
            # driver; the in-solvent total becomes ``result.energy``.
            from vibeqc.semiempirical.methods.msindo import ANGSTROM_TO_BOHR as _A2B
            from vibeqc.semiempirical.methods.msindo_cosmo import msindo_cosmo
            from vibeqc.solvation.driver import resolve_solvent

            sm = resolve_solvent(solvent)
            if sm is None or sm.is_gas_phase:
                return _run_semiempirical(method, molecule, nddo=nddo)
            Z = [at.Z for at in molecule.atoms]
            coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]
            rc = msindo_cosmo(
                Z,
                coords_ang,
                epsilon=sm.epsilon,
                charge=int(molecule.charge),
                multiplicity=int(molecule.multiplicity),
            )
            return _SEMPR(
                float(rc.total_energy),
                None,
                method,
                converged=bool(rc.converged),
                n_iter=int(rc.n_iter),
                binding_energy=float(rc.binding_energy),
                e_solv=float(rc.e_solv),
                e_gas=float(rc.e_gas),
            )
        return _run_semiempirical(method, molecule, nddo=nddo)

    # ── Periodic MSINDO (Cyclic Cluster Model) ──
    if method == "ccm":
        return _run_ccm(molecule, ccm_options)

    # ── Machine-learning interatomic potentials (MACE, …) ──
    if method in _MLIP_METHODS:
        return _run_mlip(method, molecule, mlip_options)

    raise ValueError(
        f"Unknown method {method!r}; use 'rhf', 'uhf', 'rks', 'uks', "
        f"'selected_ci', 'dmrg', 'v2rdm', 'transcorrelated_ci', 'casci', "
        f"'mrci', 'casscf', 'nevpt2', 'caspt2', 'fci', "
        f"'ccsd', 'ccsd(t)', 'dftb0', 'scc_dftb', 'pm6', 'gfn2_xtb', "
        f"'om1', 'om2', 'om3', 'mace', or 'auto'"
    )


def _run_semiempirical(method: str, molecule: Molecule, *, nddo: bool = False):
    """Run a semiempirical calculation and return a result-like object.

    ``nddo`` (method="msindo" only) selects MSINDO's NDDO mode — the reference
    program's default; ignored by the other semiempirical methods."""
    import numpy as np

    # ── DFTB methods ──
    if method in ("dftb0", "scc_dftb"):
        from vibeqc.semiempirical import DFTB0Model, SCCDFTBModel
        from vibeqc.semiempirical.parameters import default_parameters

        params = default_parameters()
        if method == "dftb0":
            model = DFTB0Model(molecule, params=params)
        else:
            model = SCCDFTBModel(molecule, params=params)
        g = model.gradient() if hasattr(model, "gradient") else None
        return _SEMPR(model.energy(), g, method, converged=True, n_iter=1)

    # ── GFN2-xTB (experimental — model.energy() emits GFN2ExperimentalWarning) ──
    if method == "gfn2_xtb":
        from vibeqc.semiempirical.methods.gfn2 import GFN2Model
        from vibeqc.semiempirical.methods.gfn2_params import load_gfn2_params

        params = load_gfn2_params()
        model = GFN2Model(molecule, params)
        energy = model.energy()
        return _SEMPR(
            energy,
            None,
            method,
            converged=model.converged,
            n_iter=model.n_iter,
        )

    # ── NDDO methods (PM6, OM1, OM2, OM3) ──
    # OM1 only: the published OM1 evaluates its core-valence ECP analytically
    # (Kolb & Thiel 1993) and publishes no semiempirical ECP parameters
    # (Dral 2016 Table 1).  vibe-qc does not implement that analytic ECP yet,
    # so OM1 lacks part of its core-valence Pauli repulsion: X-H bonds come
    # out ~0.3 A short and the uncapped F2 orthogonalization term can
    # variationally collapse at close non-bonded contacts (eclipsed ethane
    # drops ~0.4 Ha; with an ECP restored the barrier is +1.9 kcal/mol vs
    # published 1.8).  PM6/OM2/OM3 carry their full published Hamiltonians
    # and are not gated.
    if method == "om1":
        import warnings as _warnings

        from vibeqc.semiempirical import NDDOExperimentalWarning

        _warnings.warn(
            "method='om1': OM1's analytically-evaluated core-valence ECP "
            "(Kolb & Thiel 1993) is not implemented; bonds to heavy atoms "
            "come out ~0.3 A short and close non-bonded contacts can "
            "variationally collapse. Use om2/om3 for production. "
            "See HANDOVER_SEMIEMPIRICAL_PES.md.",
            category=NDDOExperimentalWarning,
            stacklevel=2,
        )
    from vibeqc._vibeqc_core.semiempirical.nddo import (
        run_omx_v2,
        run_pm6,
    )

    if method == "pm6":
        from vibeqc.semiempirical.methods.pm6_params import load_pm6_params_auto

        params = load_pm6_params_auto([at.Z for at in molecule.atoms])

        if molecule.multiplicity > 1:
            from vibeqc._vibeqc_core.semiempirical.nddo import (
                compute_upm6_gradient_fd,
                run_upm6,
            )

            result_obj = run_upm6(molecule, params, max_iter=200)
            try:
                g = compute_upm6_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        else:
            result_obj = run_pm6(molecule, params, max_iter=200)
            from vibeqc._vibeqc_core.semiempirical.nddo import compute_pm6_gradient_fd

            try:
                g = compute_pm6_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        return result

    if method == "msindo":
        # MSINDO (Bredow/Geudtner/Jug INDO) — closed-shell RHF (s/p/d) +
        # open-shell UHF (s/p).
        from pathlib import Path

        from vibeqc.semiempirical.methods.msindo import (
            ANGSTROM_TO_BOHR as _A2B,
        )
        from vibeqc.semiempirical.methods.msindo import (
            eff_core_charge as _cz,
        )

        Z = [at.Z for at in molecule.atoms]
        # Atom coords are stored in bohr; both engines take Angstrom and
        # re-convert with the same MSINDO constant, so the round-trip is exact.
        coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]
        charge = int(molecule.charge)
        mult = int(molecule.multiplicity)
        nelec = sum(_cz(z) for z in Z) - charge
        open_shell = mult != 1 or nelec % 2 != 0
        has_d = any(z >= 13 and z not in (1, 2, 6, 7, 8, 9) for z in Z)

        # Determine whether the C++ backend can handle this job.
        # C++ covers: neutral or not, closed-shell s/p/d (full Z=1-54) and
        # UHF s/p (H–F).  Fallback to Python for: charged systems with d-shell,
        # open-shell d-shell, and anything not yet landed.
        use_cpp = False
        if not open_shell:
            # Closed-shell: C++ handles full s/p/d with params loaded from JSON.
            use_cpp = True
        elif not has_d:
            # Open-shell s/p: C++ UHF handles H–F elements.
            use_cpp = True

        if use_cpp:
            from vibeqc._vibeqc_core.semiempirical.indo import (
                MsindoParameterSet,
                load_params_from_json,
                merge_nddo_overrides,
                run_msindo_full,
                run_msindo_uhf,
            )

            # Load the full parameter set once (lazy init via module-level cache).
            _param_path = (
                Path(__file__).resolve().parent
                / "semiempirical"
                / "methods"
                / "msindo_params.json"
            )
            params = load_params_from_json(_param_path.read_text())
            if nddo:
                _nddo_path = (
                    Path(__file__).resolve().parent
                    / "semiempirical"
                    / "methods"
                    / "msindo_params_nddo.json"
                )
                merge_nddo_overrides(params, _nddo_path.read_text())

            if open_shell:
                result_obj = run_msindo_uhf(
                    Z,
                    coords_ang,
                    params,
                    max_iter=200,
                    multiplicity=mult,
                    nddo=nddo,
                )
            else:
                result_obj = run_msindo_full(
                    Z,
                    coords_ang,
                    params,
                    max_iter=200,
                    nddo=nddo,
                )
        else:
            from vibeqc.semiempirical.methods.msindo import run_msindo as _run_py

            result_obj = _run_py(
                Z,
                coords_ang,
                charge=charge,
                multiplicity=mult,
                max_iter=200,
                nddo=nddo,
            )
        return _SEMPR(
            float(result_obj.total_energy),
            None,
            method,
            converged=bool(result_obj.converged),
            n_iter=int(result_obj.n_iter),
            binding_energy=float(getattr(result_obj, "binding_energy", 0.0)),
        )

    if method in ("om1", "om2", "om3"):
        from vibeqc.semiempirical.methods.omx_params import (
            load_om1_params,
            load_om2_params,
            load_om3_params,
        )

        loader = {
            "om1": load_om1_params,
            "om2": load_om2_params,
            "om3": load_om3_params,
        }[method]
        params = loader()

        if molecule.multiplicity > 1:
            from vibeqc._vibeqc_core.semiempirical.nddo import (
                compute_uomx_v2_gradient_fd,
                run_uomx_v2,
            )

            result_obj = run_uomx_v2(molecule, params, max_iter=200)
            try:
                g = compute_uomx_v2_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        else:
            result_obj = run_omx_v2(molecule, params, max_iter=200)
            from vibeqc._vibeqc_core.semiempirical.nddo import (
                compute_omx_v2_gradient_fd,
            )

            try:
                g = compute_omx_v2_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        return result

    raise ValueError(f"Unhandled semiempirical method: {method!r}")


def _run_ccm(molecule: Molecule, ccm_options=None):
    """Periodic MSINDO via the Cyclic Cluster Model (method="ccm").

    Routes to the C++ CCM engine when available, with Python fallback.
    """
    from vibeqc.molecule import ANGSTROM_TO_BOHR as _A2B

    if ccm_options is None or not getattr(ccm_options, "translations", None):
        raise ValueError(
            "method='ccm' needs ccm_options carrying the supercell lattice: "
            "run_job(mol, method='ccm', ccm_options=CCMOptions("
            "translations=[[a,0,0], ...], madelung=True)). 1 vector -> CCM1D "
            "(polymer), 2 -> CCM2D (surface), 3 -> CCM3D (bulk)."
        )
    if int(molecule.multiplicity) != 1:
        raise NotImplementedError(
            "periodic CCM is closed-shell (RHF) only; open-shell / ROHF CCM is "
            "not implemented. Use a closed-shell cell (multiplicity=1)."
        )
    Z = [at.Z for at in molecule.atoms]
    coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]

    # Try C++ CCM engine first.
    try:
        from pathlib import Path as _Path

        from vibeqc._vibeqc_core.semiempirical.indo import (
            load_params_from_json,
        )
        from vibeqc._vibeqc_core.semiempirical.indo import (
            run_ccm as _run_ccm_cpp,
        )

        _PARAM_PATH = (
            _Path(__file__).resolve().parent
            / "semiempirical"
            / "methods"
            / "msindo_params.json"
        )
        params = load_params_from_json(_PARAM_PATH.read_text())
        result_obj = _run_ccm_cpp(
            Z,
            coords_ang,
            ccm_options.translations,
            params,
            madelung=ccm_options.madelung,
        )
        if not getattr(result_obj, "converged", False):
            # C++ engine declined (e.g. element out of scope) — fall back.
            raise RuntimeError("C++ CCM did not converge")
    except Exception:
        from vibeqc.semiempirical.methods.msindo_ccm import run_ccm as _run_ccm_py

        result_obj = _run_ccm_py(
            Z,
            coords_ang,
            ccm_options.translations,
            charge=int(molecule.charge),
            madelung=ccm_options.madelung,
        )

    return _SEMPR(
        float(result_obj.total_energy),
        None,
        "ccm",
        converged=bool(result_obj.converged),
        n_iter=int(result_obj.n_iter),
    )


def _citation_manifest_rows(refs: Any) -> list[dict[str, Any]]:
    """Flatten an :class:`AssembledCitations` into the scalar-dict rows
    the ``.system`` manifest's ``[citations]`` section expects.

    Uses the *full* ``.citations`` view — every entry regardless of its
    ``print`` flag (CLAUDE.md §8.3/§8.5: the ``.system`` manifest is the
    internal-provenance destination, so link-time-only entries like
    pybind11 belong here even though they are filtered out of the
    user-facing references block). Missing optional fields become ``""``
    sentinels so the array-of-tables keeps a fixed shape across rows.
    """
    rows: list[dict[str, Any]] = []
    for c in getattr(refs, "citations", ()) or ():
        year = getattr(c, "year", None)
        rows.append(
            {
                "key": str(getattr(c, "key", "")),
                "kind": str(getattr(c, "kind", "")),
                "authors": "; ".join(getattr(c, "authors", ()) or ()),
                "title": str(getattr(c, "title", "")),
                "journal": str(getattr(c, "journal", None) or ""),
                "year": year if isinstance(year, int) else str(year or ""),
                "doi": str(getattr(c, "doi", None) or ""),
                "license": str(getattr(c, "license", None) or ""),
                "print": bool(getattr(c, "print", True)),
            }
        )
    return rows


def _run_mlip(method: str, molecule: Molecule, mlip_options=None):
    """Run a machine-learning interatomic potential and return a
    result-like object (duck-typing the SCF-result interface).

    vibe-qc drives the external pre-trained model's forward pass; the
    returned ``.energy`` is the model's reference-shifted DFT-surface
    energy (Hartree), NOT a total electronic energy (``CLAUDE.md`` §10 /
    energy-scale caveat — do not mix with HF/DFT totals).

    ``mlip_options`` (:class:`vibeqc.mlip.MLIPOptions`) selects the model,
    device, and dtype, and carries the ASL acknowledgment. Selecting an
    ASL (academic, non-commercial) model without acknowledgment raises
    :class:`PermissionError` (see :mod:`vibeqc.mlip.mace`).
    """
    if method == "mace":
        from vibeqc.mlip.mace import MACEModel

        model = MACEModel(molecule, mlip_options)
        return _MLIPResult(
            model.energy(),
            model.gradient(),
            method,
            converged=True,
            n_iter=1,
            model_info=model.info,
        )
    raise ValueError(f"Unhandled MLIP method: {method!r}")


class _SEMPR:
    """Semiempirical result wrapper — duck-typing the interface
    format_scf_trace and run_job expect."""

    def __init__(
        self,
        energy,
        gradient=None,
        method_name="",
        converged=True,
        n_iter=1,
        binding_energy=None,
        e_solv=None,
        e_gas=None,
    ):
        self.energy = energy
        self._gradient = gradient
        self.method = method_name
        self.converged = converged
        self.n_iter = n_iter
        # MSINDO reports an atomization/binding energy (E − Σ atomic reference)
        # directly; None for engines that don't (PM6 / OMx / DFTB / GFN2).
        self.binding_energy = binding_energy
        # Implicit-solvation diagnostics (MSINDO COSMO via run_job(solvent=...)):
        # ``energy`` is then the in-solvent total, ``e_gas`` the gas reference,
        # ``e_solv = energy − e_gas``.  None for gas-phase runs.
        self.e_solv = e_solv
        self.e_gas = e_gas
        self.scf_trace = []

    def gradient(self):
        return self._gradient


class _MLIPResult:
    """Machine-learning-interatomic-potential result wrapper — duck-types
    the same interface as :class:`_SEMPR` (energy, gradient(), method,
    converged, n_iter, scf_trace). ``energy`` is the model's
    reference-shifted DFT-surface energy (Hartree), not a total electronic
    energy."""

    def __init__(
        self,
        energy,
        gradient=None,
        method_name="",
        converged=True,
        n_iter=1,
        model_info=None,
    ):
        self.energy = energy
        self._gradient = gradient
        self.method = method_name
        self.converged = converged
        self.n_iter = n_iter
        self.scf_trace = []
        # MaceModelInfo (or None) — provenance for the MLIP model actually
        # run; used by run_job for the per-model citation + .out/.system.
        self.model_info = model_info

    def gradient(self):
        return self._gradient


def _format_mlip_provenance(info) -> str:
    """A self-documenting MLIP provenance block for the .out — which
    external pre-trained model produced these numbers, its license, and
    how to read the (model-specific, reference-shifted) energy."""
    lic = info.license.upper()
    if lic == "MIT":
        lic_note = "MIT  [free for commercial + academic use]"
    elif lic == "ASL":
        lic_note = "ASL  [ACADEMIC, NON-COMMERCIAL use only]"
    else:
        lic_note = f"{info.license}  [unverified — treated as academic-only]"
    elts = f", {info.elements} elements" if info.elements else ""
    lines = [
        "  " + "-" * 60,
        "  MACE machine-learning interatomic potential",
        "  " + "-" * 60,
        f"  Model:     {info.key}  ({info.domain}{elts})",
        f"  License:   {lic_note}",
        f"  Training:  {info.training_data}   |   Theory: {info.theory}",
        "  Energy:    model-specific reference-shifted scale — NOT a",
        "             vibe-qc total energy, not comparable across models.",
    ]
    if info.doi:
        lines.append(f"  Reference: doi:{info.doi}  (cited in the .bibtex sibling)")
    lines.append(
        "  vibe-qc drives ACEsuit MACE's pre-trained forward pass (CLAUDE.md §10)."
    )
    lines.append("  " + "-" * 60)
    return "\n".join(lines)


def _active_rdm1(cas):
    """Active-space 1-RDM of a CASCI result's lowest root.

    Feeds the natural-occupation diagnostic in the .out solver block;
    routed through solvers._rdm.make_rdm12 so large full-CAS spaces use
    the C++ kernel.  Diagnostics must never kill a converged run, so any
    failure degrades to None (the block is simply omitted).
    """
    try:
        from .solvers._rdm import make_rdm12

        return make_rdm12(cas.ci_coeffs, cas.determinants, cas.n_active_orb)[0]
    except Exception:
        return None


#: Skip the per-root ⟨S²⟩ diagnostic above this determinant count: the
#: spin-orbital-dict evaluation is Python-side and would add tens of
#: seconds on multi-million-determinant direct-CI vectors.
_ROOT_S2_MAX_DET = 200_000


def _root_s2_values(cas, ms2):
    """Per-root ⟨S²⟩ of a multi-root CASCI result, for the .out root table.

    Shows which spin sector each state-averaged root lives in, the
    visibility surface for the 2026-06-11 spin-pure SA default (spin-pure
    runs print S(S+1) rows; ``spin_pure=False`` ensembles expose their
    mixed-spin composition).  Same degrade-to-omission contract as
    :func:`_active_rdm1`: any failure (or an oversized determinant list)
    returns None and the column is omitted.
    """
    try:
        if cas.ci_coeffs_all is None or cas.n_det > _ROOT_S2_MAX_DET:
            return None
        from .solvers._ms_caspt2 import _s2_expectation

        return [
            float(
                _s2_expectation(
                    cas.ci_coeffs_all[:, k],
                    cas.determinants,
                    cas.n_active_orb,
                    ms2,
                )
            )
            for k in range(cas.ci_coeffs_all.shape[1])
        ]
    except Exception:
        return None


def _format_solver_trace(result: SolverResult) -> str:
    """Format a non-mean-field solver result for the .out file."""
    import numpy as np

    lines = []
    lines.append(f"  Method:            {result.method}")
    lines.append(f"  Total energy:      {result.energy:16.10f} Ha")
    lines.append(f"  Converged:         {result.converged}")
    lines.append(f"  Iterations/sweeps: {result.n_iter}")
    if result.pt2_correction is not None and abs(result.pt2_correction) > 1e-12:
        e_var = result.energy - result.pt2_correction
        lines.append(f"  E(variational):    {e_var:16.10f} Ha")
        lines.append(f"  E(PT2 correction): {result.pt2_correction:16.10f} Ha")
    if result.bond_dim is not None:
        lines.append(f"  Bond dimension:    {result.bond_dim}")
    if result.truncation_error is not None:
        lines.append(f"  Truncation error:  {result.truncation_error:.2e}")
    if result.constraint_residual is not None:
        lines.append(f"  Constraint resid.: {result.constraint_residual:.2e}")
    if result.ci_labels is not None and result.ci_coeffs is not None:
        lines.append(f"  Determinants:      {len(result.ci_labels)}")
        c_abs = np.abs(result.ci_coeffs)
        n_show = min(8, len(c_abs))
        idx = np.argsort(-c_abs)
        lines.append("  Leading configurations:")
        norb = 0
        for rank in range(n_show):
            i = idx[rank]
            label = result.ci_labels[i]
            # Format SpinDet as α|...⟩ β|...⟩, Det as |...⟩
            if (
                isinstance(label, tuple)
                and len(label) == 2
                and isinstance(label[0], tuple)
            ):
                a_str = "".join(
                    "1" if j in label[0] else "0"
                    for j in range(max(label[0]) + 1 if label[0] else 0)
                )
                b_str = "".join(
                    "1" if j in label[1] else "0"
                    for j in range(max(label[1]) + 1 if label[1] else 0)
                )
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  α|{a_str}⟩ β|{b_str}⟩")
            else:
                norb = max(norb, max(label) + 1 if label else 0)
                occ_str = "".join("1" if j in label else "0" for j in range(norb))
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  |{occ_str}⟩")
    if result.root_energies is not None and len(result.root_energies) > 1:
        lines.append("  Root energies:")
        s2s = result.root_s2
        if s2s is not None and len(s2s) != len(result.root_energies):
            s2s = None
        for k, e_root in enumerate(result.root_energies):
            s2_note = f"   <S^2> = {s2s[k]:7.4f}" if s2s is not None else ""
            lines.append(f"    root {k}:  E = {e_root:16.10f} Ha{s2_note}")
    if result.selected_pt2:
        lines.append(
            "  Epstein-Nesbet PT2 on the selected wavefunction"
            " (Sharma et al 2017; variational energy unchanged):"
        )
        for k, p in enumerate(result.selected_pt2):
            sig = (
                f" +/- {p['stderr']:.8f}"
                if p.get("stderr")
                else ""
            )
            npert = (
                f"   ({p['n_perturbers']} perturbers)"
                if p.get("n_perturbers") is not None
                else "   (semistochastic)"
            )
            lines.append(
                f"    root {k}:  E_PT2 = {p['e_pt2']:16.10f} Ha{sig}"
            )
            lines.append(
                f"             E_var+PT2 = {p['e_total']:16.10f} Ha{npert}"
            )
    if result.multistate is not None:
        ms = result.multistate
        nst = ms["nroots"]
        mode_label = "XMS-CASPT2" if ms["mode"] == "xms" else "MS-CASPT2"
        lines.append(f"  {mode_label} ({nst} states):")
        lines.append(
            "    model-space reference energies (Ha): "
            + " ".join(f"{e:16.10f}" for e in ms["ref_energies"])
        )
        if ms.get("s2_values"):
            lines.append(
                "    model-space <S^2> (spin-pure roots):  "
                + " ".join(f"{s:16.4f}" for s in ms["s2_values"])
            )
        lines.append(
            "    single-state CASPT2 diagonal (Ha):   "
            + " ".join(f"{e:16.10f}" for e in ms["ss_energies"])
        )
        if ms["mode"] == "xms":
            lines.append("    XMS model-space rotation U (column = rotated state):")
            for row in np.asarray(ms["xms_rotation"]):
                lines.append(
                    "      " + " ".join(f"{u:10.6f}" for u in row)
                )
        lines.append("    effective Hamiltonian (symmetrized, Ha):")
        for row in np.asarray(ms["heff"]):
            lines.append("      " + " ".join(f"{h:16.10f}" for h in row))
        lines.append("    mixing (column k = multi-state root k):")
        for row in np.asarray(ms["mixing"]):
            lines.append("      " + " ".join(f"{c:10.6f}" for c in row))
    if result.rdm1 is not None:
        occ = np.linalg.eigvalsh(np.asarray(result.rdm1))[::-1]
        lines.append(
            "  Natural occupations (active): "
            + " ".join(f"{o:7.4f}" for o in occ)
        )
    if result.energy_trace:
        lines.append("  Energy trace:")
        for i, e in enumerate(result.energy_trace):
            lines.append(f"    iter {i + 1:4d}:  E = {e:16.10f} Ha")
    return "\n".join(lines)


def _geom_summary(molecule: Molecule) -> str:
    """Three-column per-atom geometry listing (bohr)."""
    lines = ["  Atoms (bohr)", "  " + "-" * 54]
    for i, atom in enumerate(molecule.atoms, start=1):
        lines.append(
            f"  {i:4d}  Z={atom.Z:3d}   "
            f"{atom.xyz[0]:14.8f}  {atom.xyz[1]:14.8f}  {atom.xyz[2]:14.8f}"
        )
    lines.append(
        f"  charge={molecule.charge}  multiplicity={molecule.multiplicity}  "
        f"n_electrons={molecule.n_electrons()}"
    )
    return "\n".join(lines)


def _job_header(
    method: str, basis_name: str, functional: Optional[str], scf_reference: str = "RHF"
) -> str:
    label = f"{method.upper()}"
    if method in ("rks", "uks") and functional:
        label = f"{label} / {functional}"
    # Post-SCF methods name their mean-field reference (RHF for closed shell,
    # UHF for an open-shell MP2/UMP2 run).
    if method in ("ccsd", "ccsd(t)", "mp2", "scs-mp2", "sos-mp2", "dlpno-mp2",
                  "dlpno-ccsd", "dlpno-ccsd(t)", "ovgf"):
        label = f"{scf_reference.upper()} + {method.upper()}"
    return f"  Job: {label}  basis={basis_name}"


def _make_semiempirical_ase_calculator(
    molecule: Molecule,
    method: str,
):
    """Return an ASE Calculator that computes energy via the
    semiempirical dispatcher and forces via central finite differences."""
    try:
        from ase.calculators.calculator import Calculator
        from ase.units import Bohr, Hartree
    except ImportError:
        raise ImportError("Geometry optimization requires ASE: pip install ase")

    import numpy as np

    from ._vibeqc_core import Atom as _Atom
    from ._vibeqc_core import Molecule as _Molecule

    h = 0.001  # bohr

    class _SemiempiricalCalculator(Calculator):
        implemented_properties = ["energy", "forces"]

        def calculate(self, atoms, properties, system_changes):
            # Record self.atoms per the ASE Calculator contract. Without
            # it check_state() reports every query as changed, so cached
            # results are discarded (each get_* triggers a fresh solve)
            # and TrajectoryWriter finds no stored energy — semiempirical
            # optimization trajectories lost their per-frame energies.
            Calculator.calculate(self, atoms, properties, system_changes)
            positions_bohr = atoms.positions / Bohr
            mol = _Molecule(
                [
                    _Atom(int(z), list(xyz))
                    for z, xyz in zip(atoms.numbers, positions_bohr)
                ],
                charge=molecule.charge,
                multiplicity=molecule.multiplicity,
            )

            result = _run_semiempirical(method, mol)
            energy_ha = float(getattr(result, "energy", 0.0))
            self.results["energy"] = energy_ha * Hartree

            if "forces" in properties:
                forces_ha_bohr = np.zeros((len(atoms), 3))
                for i in range(len(atoms)):
                    for c in range(3):
                        pos_plus = positions_bohr.copy()
                        pos_plus[i, c] += h
                        mol_p = _Molecule(
                            [
                                _Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_plus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        e_p = float(
                            getattr(_run_semiempirical(method, mol_p), "energy", 0.0)
                        )

                        pos_minus = positions_bohr.copy()
                        pos_minus[i, c] -= h
                        mol_m = _Molecule(
                            [
                                _Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_minus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        e_m = float(
                            getattr(_run_semiempirical(method, mol_m), "energy", 0.0)
                        )

                        forces_ha_bohr[i, c] = (e_m - e_p) / (2.0 * h)
                self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)

    return _SemiempiricalCalculator()


def _make_wavefunction_ase_calculator(
    molecule: Molecule,
    basis_name: str,
    method: str,
    *,
    functional=None,
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    casci_options=None,
    caspt2_options=None,
    casscf_options=None,
    active_space=None,
    cas_reference=None,
    dft_plus_u: Optional[List["HubbardSite"]] = None,
):
    """Return an ASE Calculator that computes energy via the wavefunction
    solver and forces via central finite differences (h = 0.001 bohr).

    When ``dft_plus_u`` is set, the per-geometry SCF (energy + each FD
    gradient probe) runs with +U via :func:`_run_single_point`'s
    ``dft_plus_u`` kwarg. The analytic +U gradient (Increment 3) is
    *not* used by this calculator — FD is good enough for BFGS step
    direction and avoids re-plumbing the calculator's force-call
    contract. For a tight optimization with analytic +U gradient,
    call :func:`vibeqc.compute_gradient` (or sibling) directly with
    ``dft_plus_u=`` per-step in your own loop.
    """
    try:
        from ase.calculators.calculator import Calculator
        from ase.units import Bohr, Hartree
    except ImportError:
        raise ImportError("Geometry optimization requires ASE: pip install ase")

    import numpy as np

    from ._vibeqc_core import Atom, BasisSet
    from ._vibeqc_core import Molecule as _Molecule

    h = 0.001  # bohr — central-finite-difference step

    class _WavefunctionCalculator(Calculator):
        implemented_properties = ["energy", "forces"]

        def calculate(self, atoms, properties, system_changes):
            # Record self.atoms per the ASE Calculator contract. Without
            # it check_state() reports every query as changed, so cached
            # results are discarded (each get_* triggers a fresh solve)
            # and TrajectoryWriter finds no stored energy — wavefunction
            # optimization trajectories lost their per-frame energies.
            Calculator.calculate(self, atoms, properties, system_changes)
            # Convert ASE Atoms → vibeqc Molecule
            positions_bohr = atoms.positions / Bohr
            mol = _Molecule(
                [
                    Atom(int(z), list(xyz))
                    for z, xyz in zip(atoms.numbers, positions_bohr)
                ],
                charge=molecule.charge,
                multiplicity=molecule.multiplicity,
            )
            basis = BasisSet(mol, basis_name)

            # Compute energy at current geometry.  This call and the two
            # FD-displaced calls below must pass the *same* solver options:
            # any mismatch makes the BFGS energy and its forces sample
            # different surfaces (pre-2026-06-12 the energy call dropped
            # casscf_options while the FD calls carried it).
            result = _run_single_point(
                method,
                mol,
                basis,
                functional=functional,
                selected_ci_options=selected_ci_options,
                dmrg_options=dmrg_options,
                v2rdm_options=v2rdm_options,
                transcorrelated_options=transcorrelated_options,
                casci_options=casci_options,
                caspt2_options=caspt2_options,
                casscf_options=casscf_options,
                active_space=active_space,
                cas_reference=cas_reference,
                dft_plus_u=dft_plus_u,
            )
            energy_ha = float(getattr(result, "energy", 0.0))
            self.results["energy"] = energy_ha * Hartree

            # Numerical forces via central differences
            if "forces" in properties:
                forces_ha_bohr = np.zeros((len(atoms), 3))
                for i in range(len(atoms)):
                    for c in range(3):
                        # +h displacement
                        pos_plus = positions_bohr.copy()
                        pos_plus[i, c] += h
                        mol_plus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_plus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_plus = BasisSet(mol_plus, basis_name)
                        result_plus = _run_single_point(
                            method,
                            mol_plus,
                            basis_plus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            casci_options=casci_options,
                            caspt2_options=caspt2_options,
                            casscf_options=casscf_options,
                            active_space=active_space,
                            cas_reference=cas_reference,
                            dft_plus_u=dft_plus_u,
                        )
                        e_plus = float(getattr(result_plus, "energy", 0.0))

                        # -h displacement
                        pos_minus = positions_bohr.copy()
                        pos_minus[i, c] -= h
                        mol_minus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_minus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_minus = BasisSet(mol_minus, basis_name)
                        result_minus = _run_single_point(
                            method,
                            mol_minus,
                            basis_minus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            casci_options=casci_options,
                            caspt2_options=caspt2_options,
                            casscf_options=casscf_options,
                            active_space=active_space,
                            cas_reference=cas_reference,
                            dft_plus_u=dft_plus_u,
                        )
                        e_minus = float(getattr(result_minus, "energy", 0.0))

                        # Central difference: F = -dE/dR
                        forces_ha_bohr[i, c] = -(e_plus - e_minus) / (2 * h)

                self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)

    return _WavefunctionCalculator()


def _optimize_geometry(
    molecule: Molecule,
    basis_name: str,
    *,
    functional: Optional[str],
    trajectory_path: Path,
    fmax: float,
    max_steps: int,
    dispersion_params: Optional[D3BJParams] = None,
    method: str = "rhf",
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    casci_options=None,
    caspt2_options=None,
    casscf_options=None,
    active_space=None,
    cas_reference=None,
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
    mlip_options: Optional[MLIPOptions] = None,
) -> Molecule:
    """Run an ASE/BFGS geometry optimization, writing frames to a .traj file.

    Returns a new Molecule at the optimized geometry. The SCF options
    apply to every intermediate geometry, not just the final point — use
    them to bump ``max_iter`` / add damping for systems where BFGS
    sometimes lands on a hard-to-converge geometry (H-bonded clusters,
    near-degenerate states).

    For wavefunction methods (``selected_ci``, ``dmrg``, ``v2rdm``,
    ``transcorrelated_ci``, ``casci``, ``casscf``), forces are computed
    via central finite differences because the solvers do not yet provide
    analytic gradients. The per-step calculator receives the same solver
    options (``casscf_options``, ``casci_options``, ``cas_reference``, …)
    as the final single point, so the optimizer walks the same surface
    the reported final energy is evaluated on.
    """
    try:
        from ase import Atoms
        from ase.io.trajectory import Trajectory
        from ase.optimize import BFGSLineSearch
        from ase.units import Bohr
    except ImportError as exc:
        raise ImportError(
            "Geometry optimization requires ASE. Install it with "
            "`pip install ase` into your vibe-qc venv."
        ) from exc

    from .ase import VibeQC

    positions_ang = [[coord * Bohr for coord in atom.xyz] for atom in molecule.atoms]
    atoms = Atoms(
        numbers=[atom.Z for atom in molecule.atoms],
        positions=positions_ang,
    )

    _se_methods = {
        "dftb0",
        "scc_dftb",
        "pm6",
        "gfn2_xtb",
        "om1",
        "om2",
        "om3",
        "msindo",
    }
    # casci / casscf joined 2026-06-12: they previously fell through to the
    # mean-field VibeQC calculator below, so the optimizer silently walked
    # the RHF/RKS surface while the final single point ran the CAS solver.
    # nevpt2 / caspt2 / mrci / fci still fall through (routing them onto an
    # FD-on-PT2 surface is a cost/policy call for the maintainer).
    wavefunction_methods = {
        "selected_ci",
        "dmrg",
        "v2rdm",
        "transcorrelated_ci",
        "casci",
        "casscf",
    }
    if method in _se_methods:
        atoms.calc = _make_semiempirical_ase_calculator(
            molecule,
            method,
        )
    elif method in _MLIP_METHODS:
        # MACE geometry optimization: ASE BFGS drives the MACE ASE
        # calculator directly (it works in eV/Angstrom natively, the units
        # ASE optimizers expect). run_job's early ASL gate already
        # validated the model; MACEModel re-checks (defense in depth).
        from vibeqc.mlip.mace import MACEModel

        atoms.calc = MACEModel(molecule, mlip_options).calculator
    elif method in wavefunction_methods:
        atoms.calc = _make_wavefunction_ase_calculator(
            molecule,
            basis_name,
            method,
            functional=functional,
            selected_ci_options=selected_ci_options,
            dmrg_options=dmrg_options,
            v2rdm_options=v2rdm_options,
            transcorrelated_options=transcorrelated_options,
            casci_options=casci_options,
            caspt2_options=caspt2_options,
            casscf_options=casscf_options,
            active_space=active_space,
            cas_reference=cas_reference,
        )
    else:
        atoms.calc = VibeQC(
            basis=basis_name,
            charge=molecule.charge,
            multiplicity=molecule.multiplicity,
            functional=functional,
            dispersion=dispersion_params,
            rhf_options=rhf_options,
            uhf_options=uhf_options,
            rks_options=rks_options,
            uks_options=uks_options,
        )

    with Trajectory(str(trajectory_path), "w", atoms) as traj:
        # BFGSLineSearch refuses uphill steps by construction — essential
        # on flat / weakly-bound PESs (H-bonded clusters, dispersion-dominated
        # complexes) where plain BFGS's Hessian extrapolation routinely
        # pushes atoms apart. Works on covalent minima too, so we use it
        # uniformly. Default ASE line-search parameters are well-tuned;
        # don't clamp maxstep here or the line search can't converge.
        opt = BFGSLineSearch(atoms, logfile=None)
        opt.attach(traj)
        opt.run(fmax=fmax, steps=max_steps)

    # Re-build a vibe-qc Molecule at the optimized geometry.
    from ._vibeqc_core import Atom as _Atom

    new_positions_bohr = atoms.positions / Bohr
    optimized = Molecule(
        [_Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, new_positions_bohr)],
        molecule.charge,
        molecule.multiplicity,
    )
    return optimized


def _resolve_optimizer_backend(requested: str) -> str:
    """Resolve ``optimizer_backend`` to ``"ase"`` or ``"native"``."""
    if requested == "ase":
        try:
            import ase  # noqa: F401
        except ImportError:
            raise ImportError(
                "optimizer_backend='ase' requires ASE. Install with "
                "`pip install ase` or use optimizer_backend='native'."
            ) from None
        return "ase"
    if requested == "native":
        return "native"
    if requested != "auto":
        raise ValueError(
            f"optimizer_backend={requested!r} — expected 'auto', 'ase', or 'native'."
        )
    # "auto": prefer ASE if installed, otherwise native.
    try:
        import ase  # noqa: F401
    except ImportError:
        return "native"
    return "ase"


[docs] def run_job( molecule: Molecule, *, basis: Optional[str] = None, method: Method = "auto", functional: Optional[str] = None, output: str | os.PathLike = "output", optimize: bool = False, write_molden_file: bool = True, write_xyz_file: bool = True, write_population_file: bool = True, write_cube: Union[bool, str, int, list, tuple, None] = False, cube_spacing: float = 0.2, cube_padding: float = 4.0, citations: bool = True, dry_run: bool = False, fmax: float = 0.05, max_opt_steps: int = 200, optimizer_backend: str = "auto", memory_override: bool = False, num_threads: Optional[int] = None, dispersion: DispersionSpec = None, solvent: object = None, record_hostname: bool = True, rhf_options: Optional[RHFOptions] = None, uhf_options: Optional[UHFOptions] = None, rks_options: Optional[RKSOptions] = None, uks_options: Optional[UKSOptions] = None, selected_ci_options: Optional[SelectedCIOptions] = None, dmrg_options: Optional[DMRGOptions] = None, v2rdm_options: Optional[V2RDMOptions] = None, transcorrelated_options: Optional[TranscorrelatedOptions] = None, casci_options: Optional[CASCIOptions] = None, caspt2_options: Optional[CASPT2Options] = None, casscf_options: Optional[CASSCFOptions] = None, dlpno_options: Optional["DLPNOMP2Options"] = None, dlpno_ccsd_options: Optional["DLPNOCCSDPilotOptions"] = None, active_space: Optional[tuple[int, int]] = None, cas_reference: Optional[str] = None, mlip_options: Optional[MLIPOptions] = None, ccm_options: Optional["CCMOptions"] = None, progress: Union[bool, ProgressLogger, None] = None, verbose: Optional[int] = None, use_logging: bool = False, perf_log: Optional[Union[str, os.PathLike, bool]] = None, structured_log: Union[bool, str, os.PathLike, None] = False, crash_dump: Union[bool, str, os.PathLike, None] = True, # QVF visualisation archive (v1). output_qvf: bool = False, hessian: bool = False, # Partial Hessian: atom indices to hold fixed so only the unfrozen # atoms are displaced (6M instead of 6N SCFs); the frequencies are # then vibrational-only (no gas-phase trans/rot). See HessianFDOptions. hessian_frozen_indices: Optional[List[int]] = None, # Thermochemistry (RRHO ideal-gas). When a Hessian is computed # (``hessian=True``), the harmonic frequencies feed an end-of-run # thermochemistry block (ZPE + thermal U/H/S/G at T, p). Pass a # ThermoOptions to set temperature / pressure / rotational symmetry # number; None uses the defaults (298.15 K, 1 atm, σ=1). thermo_options: Optional["ThermoOptions"] = None, # Atomization energy (Σ E_atom − E_mol) for mean-field methods. When True, # free-atom ground-state references are computed at the same level (cached # per element) and an atomization block is written. Main-group H–Kr; see # vibeqc.atomization. atomization: bool = False, # DFT+U (Dudarev rotationally-invariant) — Increment 2c. # List of HubbardSite objects; empty / None ≡ no +U. dft_plus_u: Optional[List["HubbardSite"]] = None, # MSINDO NDDO mode (method="msindo"): the reference program's default mode # (separate parametrization + two-centre multipole 2e Fock). Ignored for # other methods. nddo: bool = False, ) -> object: """Run a vibe-qc SCF job and write the standard output files. Parameters ---------- molecule The :class:`Molecule` describing the system (bohr coordinates). basis libint-recognized basis-set name. method ``"rhf"``, ``"uhf"``, ``"rks"``, ``"uks"``, ``"auto"``, ``"ccsd"``, or ``"ccsd(t)"`` (picks restricted vs unrestricted from ``molecule.multiplicity`` and switches between HF/KS based on whether ``functional`` is set). functional XC functional for RKS / UKS (e.g. ``"PBE"``, ``"B3LYP"``). Ignored for HF. output Path stem for the generated files. ``{output}.out`` always; also ``{output}.molden`` unless disabled; and ``{output}.traj`` when ``optimize=True``. optimize Run a BFGS geometry optimization first (via ASE), then the final SCF on the optimized geometry. The trajectory is written for animation (openable with ASE-aware viewers). write_molden_file Emit ``{output}.molden`` at the converged geometry. Default True. fmax, max_opt_steps Optimizer tolerance (eV/Å) and iteration limit. Ignored unless ``optimize=True``. memory_override If ``False`` (default), the driver estimates peak memory and aborts with :class:`InsufficientMemoryError` when the estimate exceeds the machine's available RAM. Set to ``True`` to proceed anyway — at the risk of swap-thrashing or a system freeze. num_threads If set, pin the OpenMP thread count for the duration of the calculation. ``None`` (default) leaves the current setting in place — which is usually "all cores" unless the environment variable ``OMP_NUM_THREADS`` is set or :func:`vibeqc.set_num_threads` was called earlier. The actual thread count used is recorded in the output log for reproducibility. dispersion Post-SCF D3(BJ) dispersion correction. Accepts: * ``None`` (default) — no dispersion. * ``True`` or ``"d3bj"`` — use D3-BJ params for the current DFT functional. * A functional name (``"pbe"``, ``"b3lyp"``, …) — use its D3-BJ params (useful for ``method="rhf"`` + ``"hf"`` dispersion, or for overriding the SCF functional in the damping lookup). * A :class:`D3BJParams` instance — used directly. The energy correction is written to the ``.out`` file, added to the returned object as ``e_dispersion`` / ``energy_total`` (the raw SCF ``.energy`` is preserved untouched), and, when ``optimize=True``, added to the forces the optimizer sees. Routes through :func:`vibeqc.compute_d3bj` with ``backend="auto"`` — the reference ``dftd3`` backend is used when installed, otherwise the D1a framework stub. See :mod:`vibeqc.dispersion` for details. solvent v0.9.0 CPCM / COSMO implicit solvation. Accepts: * ``None`` (default) — gas-phase SCF. * A preset name (``"water"``, ``"dmso"``, ``"acetonitrile"``, ``"chloroform"``, ``"benzene"``, …) — looks up the static dielectric ε from :data:`vibeqc.SOLVENT_PRESETS`. * A numeric ε (e.g. ``78.39``) — custom dielectric. * A dict (``{"epsilon": 25.0, "variant": "cosmo", ...}``) or a :class:`vibeqc.SolventModel` instance for full control over cavity construction (Bondi radii, Lebedev order, switching width, max macro-iterations). Routes through :func:`vibeqc.run_cpcm_scf`; macro-iterates the apparent surface charge against the SCF density until ΔE_solv < 1e-6 Ha (typically 3–5 outer cycles). The total energy is the in-solvent value; the gas-phase reference is retained on ``result.solvent_result.e_gas``. See :doc:`/user_guide/solvation` for the full theory and the cavity / preset table. record_hostname If ``False``, the per-job ``{output}.system`` manifest writes ``hostname = "<redacted>"`` instead of the live hostname. The ``VIBEQC_NO_HOSTNAME=1`` environment variable does the same thing globally — engineering's bundled docs runs use the env var so example outputs in ``docs/_static/examples/`` don't leak machine names. Other manifest fields (CPU model, OS, memory, library versions) are not redacted; the redaction is scoped to the hostname only. rhf_options / uhf_options / rks_options / uks_options Optional override for the respective SCF options struct. casci_options CASCI knobs for ``method="casci"`` (the active space itself comes from ``active_space=``). ``CASCIOptions(nroots=N)`` requests N CI roots: ``result.energy`` stays the ground root, per-root energies land in ``result.root_energies`` (per-root ⟨S²⟩ in ``result.root_s2``) and the .out solver block prints the root table. Ignored by every other method — the SA-CASSCF root count is ``casscf_options.nroots``, the MS-CASPT2 model-space size ``caspt2_options.nroots``. progress Live progress logger for long-running jobs. Default behavior is **ON** — the job emits a banner, per-stage milestones, and a final summary to stdout (line-flushed) so the canonical ``nohup python LiH.py > LiH.log 2>&1 &`` + ``tail -f LiH.log`` workflow shows progress in real time. The ``.out`` file is also line-buffered so ``tail -f output-LiH.out`` works without any extra setup. Pass ``progress=False`` to silence stdout (the ``.out`` file is still written normally — only the live mirror is suppressed). Pass a :class:`vibeqc.ProgressLogger` instance for fully custom routing (tee to a persistent file, mute, thread one logger through nested calls). Set ``VIBEQC_LIVE_LOGGING=0`` in the environment to disable live progress globally — useful for batch scripts that don't want to edit every input file. The env var only takes effect when ``progress`` is left at its default (``None``); explicit ``progress=True`` / ``progress=False`` / a ``ProgressLogger`` instance always wins, so a debugging session can re-enable progress for one shell. Per-iteration progress for periodic SCFs (which run in Python) is streamed live through this same logger via the lower-level ``run_*_periodic_*`` entry points; molecular SCFs run in C++ and only emit a pre-SCF banner + post-SCF summary live, with the per-iteration trace landing in the ``.out`` when the SCF returns. verbose Integer verbosity level (PySCF convention, 0..9, default 4). Each level is a strict superset of the one below, so bumping ``verbose`` only adds output: * ``0`` — silent (nothing live; ``.out`` is still written) * ``1`` — banner + warnings + final SCF summary only * ``2`` — add per-stage milestones + ``info()`` lines * ``3`` — add per-stage timing on stage exit * ``4`` — add per-iteration SCF rows (DEFAULT) * ``5`` — add inline RSS-memory snapshots * ``6+`` — phase-level wall-clock breakdown live (overlaps the post-mortem ``.perf`` log on purpose) Pass ``verbose=None`` (the default) to read the ``VIBEQC_VERBOSE`` env var; if unset, falls back to 4. Ignored when ``progress`` is a :class:`ProgressLogger` instance — that logger's own level wins. use_logging If ``True``, route progress through ``logging.getLogger("vibeqc.run_job")`` instead of bare ``stdout`` writes. Banner / stage milestones land at ``INFO``; per-iteration SCF rows at ``DEBUG``; warnings at ``WARNING``. Composes naturally with stdlib handlers (``RotatingFileHandler``, syslog, ``dictConfig``):: import logging logging.basicConfig(level=logging.INFO) vq.run_job(..., use_logging=True) ``progress=False`` still wins as a hard kill switch — the verbose-level gate runs *before* the logging call, so a silent run stays silent regardless of the active logging config. Ignored when ``progress`` is a pre-built :class:`ProgressLogger` instance. perf_log Optional path (or ``True`` to use ``{output}.perf``) to write a post-mortem performance / debug breakdown — phase-level wall + CPU times, memory snapshots, parallelism flags. The live ``progress=`` log shows progress *during* the run; the perf log shows where the time went *afterwards*. Off by default. Pass an explicit path, ``True`` to emit alongside ``{output}.out``, or set the ``VIBEQC_PERFLOG=path`` env var (which wins when ``perf_log`` is left at ``None``). See :mod:`vibeqc.perf` for the full instrumentation surface. structured_log Optional NDJSON (one-JSON-record-per-line) log capturing every SCF transition — banner, job_start, memory_estimate, per-iter rows, scf_converged, properties, job_end. Off by default. Pass ``True`` to emit ``{output}.scf.jsonl``, a path-like to write there explicitly, or set the ``VIBEQC_STRUCTURED_LOG=path`` env var (which wins when ``structured_log`` is left at ``False``). The format is stable: events are append-only, fields are never renamed or removed. See :mod:`vibeqc.structured_log` for the full event catalog. crash_dump Write a snapshot to ``{output}.dump`` (TOML) plus binary attachments (``.dump.density.npy``, ``.dump.fock.npy``, ``.dump.mo.npy``) when the SCF fails ungracefully — raised exception (NaN, linear dependence, memory error). Default ``True``: post-mortem reproducibility costs nothing on success and saves a re-run on failure. Pass ``False`` (or set ``VIBEQC_NO_CRASH_DUMP=1`` in the environment) to disable. The dump is written alongside ``{output}.out`` in the standard ``output.*`` family — re-attach the ``.dump`` + ``.dump.density.npy`` to a bug report and the maintainer can reconstruct the failing state via :func:`vibeqc.load_dump`. See :mod:`vibeqc.crash_dump` for the dump format. The exception is always re-raised after the dump is written; ``crash_dump=True`` does *not* swallow failures. hessian When True, compute harmonic vibrational frequencies via finite-difference Hessian. Default False. Cost: ~6N SCF evals. Results printed to .out and embedded in QVF for vibe-view. Returns ------- The SCF result object (RHFResult / UHFResult / RKSResult / UKSResult). """ output_stem = Path(os.fspath(output)) out_path = output_stem.with_suffix(".out") molden_path = output_stem.with_suffix(".molden") traj_path = output_stem.with_suffix(".traj") # Resolve perf_log target. ``True`` → emit to {output}.perf; # str/Path → use that path verbatim; None → defer to # VIBEQC_PERFLOG env var inside perf_log() (or no-op if unset). if perf_log is True: _perf_target: Optional[Path] = output_stem.with_suffix(".perf") elif perf_log is False or perf_log is None: _perf_target = None else: _perf_target = Path(os.fspath(perf_log)) # Resolve structured_log target. Default OFF (False) — only emit # when the caller opts in. Resolution mirrors perf_log: # True → {output}.scf.jsonl # str/PathLike → use verbatim # False / None → defer to VIBEQC_STRUCTURED_LOG env var (or # leave disabled if the env var is unset) if structured_log is True: _structured_target: Optional[Path] = output_stem.with_suffix(".scf.jsonl") elif structured_log is False or structured_log is None: _structured_target = None else: _structured_target = Path(os.fspath(structured_log)) # Resolve crash_dump target. Default ON: dump on failure unless the # caller explicitly opts out (or VIBEQC_NO_CRASH_DUMP=1). The # post-mortem snapshot is cheap on success (zero bytes written), # and saves a re-run on failure. Resolution: # True → {output}.dump (default) # str/PathLike → use verbatim as the stem # False → disabled # None → also default-on (treated as True) _crash_off_env = os.environ.get( "VIBEQC_NO_CRASH_DUMP", "", ).strip().lower() in ("1", "true", "yes", "on") if crash_dump is False or _crash_off_env: _crash_stem: Optional[Path] = None elif crash_dump is True or crash_dump is None: _crash_stem = output_stem else: _crash_stem = Path(os.fspath(crash_dump)) # Default-ON progress: when the caller didn't pass a value, # check the env-var opt-out (VIBEQC_LIVE_LOGGING=0) and otherwise # turn it on. This lets every backgrounded ``python input.py > # log 2>&1 &`` show progress without the user having to learn # a new kwarg — answering the "is my job stuck or actually # running?" question by default. Explicit progress= (True/False/ # logger) bypasses the env var entirely. if progress is None: env = os.environ.get("VIBEQC_LIVE_LOGGING", "").strip().lower() if env in ("0", "false", "no", "off"): progress = False else: progress = True # Verbose level resolution (v0.5.3). When the caller leaves # ``verbose=None`` we read VIBEQC_VERBOSE; otherwise the # explicit argument wins. Falls back to the package default # (level 4 — banner + stages + per-iter SCF) when neither # is set. The env var is a parsing best-effort: a junk value # silently falls back to the default rather than raising, # since a typo in $VIBEQC_VERBOSE shouldn't kill someone's # overnight run. if verbose is None: env_verbose = os.environ.get("VIBEQC_VERBOSE", "").strip() if env_verbose: try: verbose_level = int(env_verbose) except ValueError: verbose_level = None # type: ignore[assignment] else: verbose_level = max(0, verbose_level) else: verbose_level = None # type: ignore[assignment] else: verbose_level = max(0, int(verbose)) plog = resolve_progress( progress, verbose=verbose_level, use_logging=use_logging, ) # Composite 3c keyword resolution (e.g. ``method="hf-3c"``). For # non-composite ``method`` values this is a pass-through. Composite # values get their (basis, functional, dispersion) tuple inferred # from the :mod:`vibeqc.composites` registry; raises # :class:`CompositeUnavailable` for recipes whose prerequisites # have not yet landed. method, basis, functional, dispersion, composite_recipe = _apply_composite( method, basis=basis, functional=functional, dispersion=dispersion, molecule=molecule, ) # Semiempirical methods don't use a Gaussian basis set — accept # any basis value (including None / empty). _se_methods = { "dftb0", "scc_dftb", "pm6", "gfn2_xtb", "om1", "om2", "om3", "msindo", "ccm", } if method in _se_methods or method in _MLIP_METHODS: if not basis: basis = "" # placeholder; semiempirical / MLIP code ignores it elif not basis: raise ValueError( "run_job: basis is required. Pass basis='def2-svp' (or " "another supported basis), or use a composite keyword " "(method='hf-3c', …) which carries its own basis. " "Composite catalogue: " f"{[r.name for r in __import__('vibeqc').list_composites()]}" ) resolved_method = _select_method(method, molecule, functional) # Early CCM gate (fail fast, clean "unavailable" — not a silent gap): the # periodic Cyclic Cluster Model is single-point only through run_job. CCM # gradients (finite-difference) + geometry optimisation live in # msindo_ccm.ccm_gradient_fd / ccm_optimize; analytic CCM gradients and the # run_job optimiser hook are not wired up. if resolved_method == "ccm" and optimize: raise NotImplementedError( "method='ccm' geometry optimisation is not available through " "run_job. Use vibeqc.semiempirical.methods.msindo_ccm.ccm_optimize " "(finite-difference, fixed Wigner-Seitz) for relaxed CCM geometries." ) # Early MLIP gate (fail fast, before any output / SCF machinery): an # academic-only (ASL) MACE model needs explicit acknowledgment, and the # model must cover every element in the system — else a config error # would otherwise surface as a crashed SCF. Both checks run again inside # MACEModel (defense in depth). if resolved_method in _MLIP_METHODS: from vibeqc.mlip import MLIPOptions, resolve_model from vibeqc.mlip.mace import element_coverage_error, enforce_academic_license _mlopts = mlip_options or MLIPOptions() _minfo = resolve_model(_mlopts.model) enforce_academic_license(_minfo, _mlopts) _ecov = element_coverage_error(_minfo, molecule) if _ecov is not None: raise ValueError(_ecov) # Composite recipes may request D4 dispersion; the D4 path is # wired through compute_d4 in the post-SCF block below. A D4 # request short-circuits the D3-BJ resolution. use_d4 = isinstance(dispersion, str) and dispersion.strip().lower() == "d4" # Build the declarative OutputPlan once. It drives BOTH the # dry-run pre-flight (below) and the real run's manifest # lifecycle (the OutputWriter constructed before the SCF). One # source of truth for "what files will this job produce". _cube_req_plan = parse_write_cube_kwarg(write_cube) _output_plan = OutputPlan.from_run_job_kwargs( output=output_stem, method=resolved_method, basis=basis, functional=functional, optimize=optimize, write_molden_file=write_molden_file, write_xyz=write_xyz_file, citations=citations, write_population=write_population_file, write_cube_density=_cube_req_plan.density, cube_mo_labels=tuple(str(lbl) for lbl in _cube_req_plan.mo_labels), perf_log=perf_log, structured_log=structured_log, crash_dump=crash_dump, output_qvf=output_qvf, ) # Dry-run short-circuit (Phase O3). When ``dry_run=True`` or the # ``VIBEQC_DRY_RUN`` env var is set, write a one-shot # ``{output}.system`` with ``[outputs].status = "dry_run"``, # print the declared-artefacts summary to stdout, and return # ``None`` without running the SCF. This is the pre-flight path # that ``vq submit`` uses to learn which files a job will # produce. Run before any heavyweight setup (basis-set # construction, memory estimate, perf tracker) so the # short-circuit is genuinely cheap. if dry_run or is_dry_run_requested(): # Make sure the parent directory exists so we don't fail on a # freshly-typed `-o /tmp/new-subdir/foo` path. output_stem.parent.mkdir(parents=True, exist_ok=True) dry_run_manifest( _output_plan, record_hostname=record_hostname, ) return None # Resolve dispersion up front so a bad spec fails before we touch # the filesystem. d3_params is None iff dispersion is disabled. # A D4 request (composite recipes r²SCAN-3c / ωB97X-3c) short- # circuits the D3-BJ path — D4 is applied in the post-SCF block. if use_d4: d3_params = None else: d3_params = _resolve_dispersion(dispersion, functional) # Pin the thread count if the caller asked for one; otherwise leave # the current state alone. We always query at the end so the output # reports what was actually used (which may differ from what the # user asked for — e.g. on a single-core build or OpenMP-disabled # environment). if num_threads is not None: set_num_threads(int(num_threads)) threads_in_use = get_num_threads() # Ensure the parent directory exists so users can give names like # "runs/water" without pre-creating "runs/". out_path.parent.mkdir(parents=True, exist_ok=True) # Construct the OutputWriter — the per-job coordinator that owns # ``{output}.system`` for the rest of the run. This writes the # manifest immediately with the ``[plan]`` section + ``[outputs] # .status = "running"`` *before* the SCF starts, so a ``vq`` # daemon polling the workspace sees the declared output set and a # liveness signal from the first moment. ``finish()`` / # ``crash()`` at the exit paths flip the status; the end-of-job # record-sweep fills ``[[outputs.files]]`` with the artefacts # that actually landed. Replaces the bare end-of-job # ``write_system_manifest`` call (pre-v1.0 dispatch-overhaul). _output_writer = OutputWriter( _output_plan, record_hostname=record_hostname, ) t_job_start = time.perf_counter() # ``buffering=1`` requests line-buffered I/O so a `tail -f` against # the .out file shows new lines as the SCF / property pipeline emits # them. We also call ``f.flush()`` explicitly after major blocks for # the same reason — line buffering only kicks in on newlines. # # The perf-log context manager wraps the whole body so PerfScope # entries from anywhere downstream — periodic SCF stages, gradient # calls, anything that calls ``with PerfScope("..."): ...`` — # accumulate into the same tracker. When ``_perf_target`` is None # (and ``$VIBEQC_PERFLOG`` isn't set), the context manager is a # cheap no-op: PerfScope.__enter__/__exit__ become a single # ContextVar lookup with no I/O. hessian_result = None with ( _perf_log_ctx(_perf_target), _structured_log_ctx(_structured_target) as _slog, crash_dump_context(_crash_stem) as _crash_target, open(out_path, "w", encoding="utf-8", buffering=1) as f, PerfScope("run_job.total"), ): # Structured log: banner + job_start records. Always safe to # emit — _slog.emit is a no-op when the log is disabled. _libs = library_versions() _fp = run_fingerprint( method=resolved_method, basis=basis, functional=functional, molecule=molecule, ) _slog.emit( "banner", vibeqc_version=VIBEQC_VERSION, libint=_libs.get("libint", "unknown"), libxc=_libs.get("libxc", "unknown"), spglib=_libs.get("spglib", "unknown"), run_fingerprint=_fp, ) _slog.emit( "job_start", method=resolved_method, basis=basis, functional=functional, optimize=bool(optimize), threads=int(threads_in_use), n_atoms=int(len(list(molecule.atoms))), charge=int(molecule.charge), multiplicity=int(molecule.multiplicity), n_electrons=int(molecule.n_electrons()), output_stem=str(output_stem), ) f.write(banner() + "\n\n") # Post-SCF methods resolve their SCF to rhf/uhf; the *header* should # still name the correlation method (RHF + MP2/CCSD), so display the # original keyword for those (but keep "auto" → resolved everywhere # else). _header_method = method if method in _POSTSCF_CITE_METHODS else resolved_method # resolved_method is the mean-field SCF a post-SCF method ran on # (rhf/uhf) — name it as the reference in the header. _scf_ref = resolved_method if resolved_method in ("rhf", "uhf") else "rhf" f.write( _job_header(_header_method, basis, functional, scf_reference=_scf_ref) + "\n" ) f.write(_geom_summary(molecule) + "\n") f.write(f" Threads: {threads_in_use} (OpenMP shared-memory parallelism)\n\n") f.flush() plog.banner( f"run_job {_job_header(_header_method, basis, functional, scf_reference=_scf_ref).strip()}" ) plog.info(f"Threads: {threads_in_use} (OpenMP shared-memory parallelism)") plog.info(f"Output file: {out_path}") t_opt = 0.0 _traj_frames = [] _traj_energies = [] if optimize: # Increment 3 — analytic +U gradient via the # variational F = F_HF + S V_AO S Fock contribution. # The ASE _WavefunctionCalculator path is plumbed below. # For dft_plus_u, pre-populate the relevant method-options # struct's dft_plus_u_sites / dft_plus_u_ao_groups fields # once at the initial geometry — the AO-group indices are # geometry-invariant (depend only on shell layout per atom # Z + basis name), so the same translation is valid at # every optimization step. if dft_plus_u and resolved_method in ( "rhf", "uhf", "rks", "uks", ): from .dft_plus_u import _apply_dft_plus_u_to_options # Materialise a defaulted options struct if the user # didn't pass one, so the C++ binding sees a real # object with the dft_plus_u_sites field populated. if resolved_method == "rhf": if rhf_options is None: rhf_options = RHFOptions() _opts_for_dft_plus_u = rhf_options elif resolved_method == "uhf": if uhf_options is None: uhf_options = UHFOptions() _opts_for_dft_plus_u = uhf_options elif resolved_method == "rks": if rks_options is None: rks_options = RKSOptions() _opts_for_dft_plus_u = rks_options else: # uks if uks_options is None: uks_options = UKSOptions() _opts_for_dft_plus_u = uks_options _basis_for_dft_plus_u = BasisSet(molecule, basis) _apply_dft_plus_u_to_options( _opts_for_dft_plus_u, _basis_for_dft_plus_u, dft_plus_u, ) _opt_backend = _resolve_optimizer_backend(optimizer_backend) if _opt_backend == "ase": f.write(f" Geometry optimization (ASE/BFGS) -> {traj_path.name}\n") f.write(f" fmax = {fmax} eV/A, max_steps = {max_opt_steps}\n\n") f.flush() with ( plog.stage( "geometry_optimization", detail=f"BFGS, fmax={fmax} eV/A, max_steps={max_opt_steps}", ), PerfScope("geometry_optimization"), ): t0 = time.perf_counter() molecule = _optimize_geometry( molecule, basis, functional=functional, trajectory_path=traj_path, fmax=fmax, max_steps=max_opt_steps, dispersion_params=d3_params, method=resolved_method, selected_ci_options=selected_ci_options, dmrg_options=dmrg_options, v2rdm_options=v2rdm_options, transcorrelated_options=transcorrelated_options, casci_options=casci_options, caspt2_options=caspt2_options, casscf_options=casscf_options, active_space=active_space, cas_reference=cas_reference, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, mlip_options=mlip_options, ) t_opt = time.perf_counter() - t0 f.write(" Optimized geometry\n") f.write(_geom_summary(molecule) + "\n\n") f.flush() # Read trajectory frames for QVF archive. _traj_frames = [] _traj_energies = [] try: from ase.io.trajectory import Trajectory from ase.units import Bohr, Hartree from ._vibeqc_core import Atom traj = Trajectory(str(traj_path)) for atoms in traj: pos_bohr = atoms.positions / Bohr frame_mol = Molecule( [ Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, pos_bohr) ], molecule.charge, molecule.multiplicity, ) _traj_frames.append(frame_mol) try: e_eV = atoms.get_potential_energy() if e_eV is not None and e_eV == e_eV: _traj_energies.append(float(e_eV) / Hartree) except Exception as _traj_e_exc: # ASE calculator may not expose energies # per frame; compatibility shim — never # crash on this. warn_output_failure( _traj_e_exc, traj_path, role="trajectory_frame_energy", category=OutputFailureKind.compatibility_fallback, ) except Exception as _traj_read_exc: # The traj file was written by ASE just above but # we can't read it back — log it (the QVF archive # will lose its trajectory section but the SCF # itself is fine). warn_output_failure( _traj_read_exc, traj_path, role="trajectory_read_for_qvf", category=OutputFailureKind.optional_artifact, ) _traj_frames = [] _traj_energies = [] else: # Native scipy L-BFGS-B backend. from .molecular_optimize import optimize_molecule as _native_opt # Convert fmax from eV/Å to Ha/bohr. 1 eV/Å ≈ 0.045 Ha/bohr. _conv_grad = fmax * 0.045 f.write( f" Geometry optimization (native L-BFGS-B)\n" f" gtol = {_conv_grad:.2e} Ha/bohr, max_steps = {max_opt_steps}\n\n" ) f.flush() with ( plog.stage( "geometry_optimization", detail=f"L-BFGS-B, gtol={_conv_grad:.1e} Ha/bohr", ), PerfScope("geometry_optimization"), ): t0 = time.perf_counter() _opt_result = _native_opt( molecule, basis, method=resolved_method, functional=functional, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, selected_ci_options=selected_ci_options, dmrg_options=dmrg_options, v2rdm_options=v2rdm_options, transcorrelated_options=transcorrelated_options, casci_options=casci_options, caspt2_options=caspt2_options, casscf_options=casscf_options, active_space=active_space, cas_reference=cas_reference, max_iter=max_opt_steps, conv_tol_grad=_conv_grad, dispersion_params=d3_params, solvent=solvent, record_trajectory=bool(output_qvf), progress=False, ) t_opt = time.perf_counter() - t0 molecule = _opt_result.system _traj_frames = _opt_result.trajectory_frames or None _traj_energies = _opt_result.trajectory_energies or None f.write(" Optimized geometry\n") f.write(_geom_summary(molecule) + "\n\n") f.flush() # Semiempirical methods don't use a Gaussian basis; skip # BasisSet construction and memory estimation. if resolved_method in _se_methods or resolved_method in _MLIP_METHODS: basis_obj = None estimate = None else: with PerfScope("basis_set_construction"): basis_obj = BasisSet(molecule, basis) # Memory pre-flight if resolved_method in ("rhf", "uhf", "rks", "uks"): est_opts = { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) else: est_opts = None estimate = estimate_memory( molecule, basis_obj, method=resolved_method, options=est_opts, ) if estimate is not None: f.write( " " + format_memory_report( estimate, override_requested=memory_override, ).replace("\n", "\n ") + "\n\n" ) f.flush() _slog.emit( "memory_estimate", total_gb=float(estimate.total_gb), raw_total_bytes=int(estimate.raw_total_bytes), headroom_factor=float(estimate.headroom_factor), by_category={k: int(v) for k, v in estimate.by_category.items()}, ) check_memory(estimate, allow_exceed=memory_override) if resolved_method in _MLIP_METHODS: plog.info( f"Evaluating {resolved_method.upper()} pre-trained MLIP " "(single forward pass; no SCF)." ) else: plog.info( f"Starting molecular SCF ({resolved_method.upper()}" + (f" / {functional}" if functional else "") + "). Per-iteration progress runs in compiled C++ and is " "not streamed live; the trace will be written when the SCF " "returns." ) # Memory snapshot before SCF: lets the perf log show RSS # growth caused by the integral / Fock build, separate from # the basis-set / pre-flight overhead. from .perf import active_tracker as _at _tracker = _at() if _tracker is not None: _tracker.snapshot_memory("start_of_scf") t0 = time.perf_counter() # Crash-dump scope: if the C++ SCF raises (NaN, linear # dependence, memory error, …), we want a snapshot of the # last known state on disk before the exception bubbles up # and the user has nothing to debug from. The state we can # capture from outside the C++ call is limited (no live # density / Fock) — we record the geometry, options, and # phase. Any deeper introspection needs a future C++ hook. try: with PerfScope(f"scf.{resolved_method}"): result = _run_single_point( resolved_method, molecule, basis_obj, functional=functional, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, selected_ci_options=selected_ci_options, dmrg_options=dmrg_options, v2rdm_options=v2rdm_options, transcorrelated_options=transcorrelated_options, casci_options=casci_options, caspt2_options=caspt2_options, casscf_options=casscf_options, active_space=active_space, cas_reference=cas_reference, mlip_options=mlip_options, ccm_options=ccm_options, solvent=solvent, dft_plus_u=dft_plus_u, nddo=nddo, ) except BaseException as _scf_exc: t_scf = time.perf_counter() - t0 f.write(f"\n SCF FAILED: {type(_scf_exc).__name__}: {_scf_exc}\n") f.flush() _slog.emit( "scf_failed", exception_type=type(_scf_exc).__name__, exception=str(_scf_exc), wall_s=float(t_scf), ) if _crash_target is not None: _opts_for_dump = ( { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) if resolved_method in ("rhf", "uhf", "rks", "uks") else None ) dump_on_failure( _crash_target, _scf_exc, {"phase": f"scf.{resolved_method}", "n_iters_completed": 0}, options=_opts_for_dump, molecule=molecule, ) f.write( f" Crash dump written to " f"{_crash_target.with_suffix('.dump').name}\n" ) f.flush() # Flip the manifest status to "crashed" before the # exception propagates — a ``vq`` daemon polling # ``{output}.system`` then sees the job aborted rather # than mistaking a stale "running" for a live job. try: _output_writer.crash(wall_seconds=t_scf) except Exception as _crash_exc: # Manifest bookkeeping must never mask the real SCF # exception we're about to re-raise. warn_output_failure( _crash_exc, _output_writer.manifest_path, role="manifest_crash_status", category=OutputFailureKind.manifest_recording, ) # Re-raise: crash_dump=True does NOT swallow the failure; # it just makes the failure debuggable. raise t_scf = time.perf_counter() - t0 if _tracker is not None: _tracker.snapshot_memory("end_of_scf") # Also record the per-iteration trace so the perf log # carries the same info as the .out's SCF trace, in # tabular form. The C++ result.scf_trace is the canonical # source for these numbers. for step in getattr(result, "scf_trace", []) or []: _tracker.add_scf_iter( iter=int(step.iter), energy=float(step.energy), dE=float(step.delta_e) if step.iter > 1 else None, grad=float(step.grad_norm), diis=int(step.diis_subspace), ) # Structured log: per-iter rows sourced from the C++ trace, # which is the canonical record of what the SCF did. Periodic # SCFs (Python-driven) emit ``scf_iter`` live via # ProgressLogger.iteration() — see vibeqc.progress; the # molecular C++ path runs as a black box so we replay its # trace here. The matching ``scf_converged`` record is emitted # below via plog.converged() so periodic and molecular paths # share the funnel. for step in getattr(result, "scf_trace", []) or []: _slog.emit( "scf_iter", iter=int(step.iter), energy=float(step.energy), dE=(float(step.delta_e) if step.iter > 1 else None), grad_norm=float(step.grad_norm), diis_subspace=int(step.diis_subspace), ) # Non-convergence path: write a crash dump too, since "ran # to max_iter" is a real failure mode. We don't raise — the # SCF returned a result and the user can inspect it — but we # leave the .dump alongside the .out so the diagnostic recipe # is the same as the exception path. if not bool(getattr(result, "converged", False)) and _crash_target is not None: dump_state = { "phase": f"scf.{resolved_method} (max_iter)", "scf_trace": list(getattr(result, "scf_trace", []) or []), "n_iters_completed": int(getattr(result, "n_iter", 0)), } for k_state, k_attr in ( ("density", "density"), ("density_alpha", "density_alpha"), ("density_beta", "density_beta"), ("fock", "fock"), ("fock_alpha", "fock_alpha"), ("fock_beta", "fock_beta"), ("mo_coeffs", "mo_coeffs"), ("mo_coeffs_alpha", "mo_coeffs_alpha"), ("mo_coeffs_beta", "mo_coeffs_beta"), ): v = getattr(result, k_attr, None) if v is not None: dump_state[k_state] = v _opts_for_dump = ( { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) if resolved_method in ("rhf", "uhf", "rks", "uks") else None ) dump_on_failure( _crash_target, None, dump_state, phase=f"scf.{resolved_method} (max_iter)", options=_opts_for_dump, molecule=molecule, ) f.write( f"\n SCF did not converge — crash dump written to " f"{_crash_target.with_suffix('.dump').name}\n" ) f.flush() # Per-analysis success flags from the .out properties block, so the # citation assembler below can gate best-effort analyses (Hirshfeld) # on whether they actually computed + surfaced for this job. Stays # empty on the solver-trace / MLIP branches (no properties block), # which correctly suppresses the Hirshfeld citation there. _prop_status: dict[str, bool] = {} # Check if we got a SolverResult (non-mean-field method) if isinstance(result, SolverResult): f.write("\n " + "-" * 60 + "\n") f.write(" Non-mean-field solver result\n") f.write(" " + "-" * 60 + "\n") f.write(_format_solver_trace(result) + "\n") elif getattr(result, "model_info", None) is not None: # MLIP (method="mace"): a single pre-trained forward pass — there # is NO SCF here, no Fock matrix, no DIIS. Emitting the empty SCF # iteration table (||[F,DS]|| / DIIS columns, "converged in 1 # iterations") would misrepresent what MACE does, so print the # single-shot energy instead, then the model provenance block # (model + license + energy-scale caveat) so the .out is self- # documenting about the external model behind these numbers. f.write("\n " + "-" * 60 + "\n") f.write(" Single-point energy — pre-trained MLIP forward pass (no SCF)\n") f.write(f" E = {float(result.energy):.10f} Ha\n") # _format_mlip_provenance opens with its own separator rule. f.write(_format_mlip_provenance(result.model_info) + "\n") else: f.write( format_scf_trace( result, molecule=molecule, basis=basis_obj, include_banner=False, property_status=_prop_status, ) + "\n" ) # MSINDO COSMO (run_job(method="msindo", solvent=...)) — surface the # solvation energy + gas reference alongside the in-solvent total. _esolv = getattr(result, "e_solv", None) if _esolv is not None: _Hk = 627.509474063 _eg = getattr(result, "e_gas", None) f.write( "\n ## Implicit solvation (COSMO)\n " + "-" * 52 + "\n" + ( f" Gas-phase total = {_eg:16.10f} Ha\n" if _eg is not None else "" ) + f" Solvation energy = {_esolv:16.10f} Ha" f" ({_esolv * _Hk:9.2f} kcal/mol)\n\n" ) _slog.emit( "e_solv", value=float(_esolv), value_kcal=float(_esolv * _Hk) ) # MSINDO reports an atomization/binding energy (E − Σ atomic # reference, ATENG) directly — surface it, the semiempirical # analogue of the mean-field atomization block. _be = getattr(result, "binding_energy", None) if _be is not None: _Hk = 627.509474063 f.write( "\n ## Atomization / binding energy\n " + "-" * 52 + "\n" f" Binding energy (E − Σ ATENG) = {_be:16.10f} Ha" f" ({_be * _Hk:9.2f} kcal/mol)\n\n" ) _slog.emit( "binding_energy", value=float(_be), value_kcal=float(_be * _Hk) ) f.flush() if resolved_method in _MLIP_METHODS: plog.info( "MLIP energy evaluated (single forward pass, no SCF); " f"E = {float(getattr(result, 'energy', 0.0)):.10f} Ha" ) else: plog.converged( n_iter=int(getattr(result, "n_iter", 0)), energy=float(getattr(result, "energy", 0.0)), converged=bool(getattr(result, "converged", False)), ) # ---- SCF diagnostics: "possibly conducting state" warning ----- # If the converged HOMO-LUMO gap is below 1e-4 Ha (~3 meV) the # Aufbau-filled ground state is unreliable — the user should # enable Fermi-Dirac smearing. _CONDUCTING_GAP_THRESHOLD_HA = 1.0e-4 has_mo_energies = hasattr(result, "mo_energies") if has_mo_energies and bool(getattr(result, "converged", False)): eps = result.mo_energies # Determine occupation count for this method. n_elec = sum(a.Z for a in molecule.atoms) - getattr(molecule, "charge", 0) if method in ("rhf", "rks"): n_occ = n_elec // 2 else: n_occ = None if n_occ is not None and n_occ > 0 and n_occ < len(eps): gap = float(eps[n_occ] - eps[n_occ - 1]) if gap < _CONDUCTING_GAP_THRESHOLD_HA: _ev_factor = 27.211386245988 msg = ( f" WARNING: POSSIBLY CONDUCTING STATE — " f"HOMO-LUMO gap = {gap:.6f} Ha ({gap * _ev_factor:.4f} eV). " f"Consider Fermi-Dirac smearing (smearing_options=...)." ) f.write("\n" + msg + "\n\n") f.flush() import warnings as _warnings _warnings.warn(msg) # Post-SCF Coupled-Cluster (CCSD / CCSD(T)). Closed-shell DF # kernel anchored against the in-repo spin-orbital SGWB-1991 # reference (tests/test_ccsd_anchor.py); dense O(N^6) pilot, # small molecules only (see docs/handover_ccsd.md). if method in ("ccsd", "ccsd(t)") and bool(getattr(result, "converged", False)): from .cc import CCSDOptions, chemical_core_orbital_count from .cc import run_ccsd as _run_ccsd_py cc_opts = CCSDOptions() cc_opts.compute_triples = method == "ccsd(t)" # Default frozen core: one chemical-core count per atom # (noble-gas cores; H2O -> 1, CO2 -> 3). cc_opts.n_frozen_core = chemical_core_orbital_count(molecule) cc_opts.resolve_aux_basis(basis) with ( plog.stage( "ccsd", detail="DF-CCSD" + ("(T)" if cc_opts.compute_triples else ""), ), PerfScope("ccsd"), ): cc_result = _run_ccsd_py(molecule, basis_obj, result, cc_opts) _slog.emit( "ccsd_converged", converged=bool(cc_result.converged), n_iter=int(cc_result.n_iter), e_ccsd_correlation=float(cc_result.e_ccsd_correlation), e_ccsd=float(cc_result.e_ccsd), e_t=float(cc_result.e_t) if cc_opts.compute_triples else None, e_total=float(cc_result.e_total), t1_norm=float(cc_result.t1_norm), t2_norm=float(cc_result.t2_norm), ) # Write CCSD iteration table triples_label = " CCSD(T)" if cc_opts.compute_triples else " CCSD" f.write("\n Coupled-Cluster" + triples_label + "\n " + "-" * 78 + "\n") if cc_opts.compute_triples: hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format( "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS" ) else: hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format( "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS" ) f.write(hdr + "\n") f.write(" " + "-" * 78 + "\n") for step in cc_result.cc_trace: f.write( " {:<5d} {:>16.10f} {:>12.6e} {:>12.6e} {:>12.6e} {:>6d}\n".format( int(step.iter), float(step.energy), float(step.delta_e) if step.iter > 1 else 0.0, float(step.r1_norm), float(step.r2_norm), int(step.diis_subspace), ) ) f.write(" " + "-" * 78 + "\n") if cc_result.converged: f.write(f" CCSD converged in {cc_result.n_iter} iterations\n") else: f.write(" CCSD DID NOT CONVERGE\n") f.write( f" E(CCSD correlation) = {cc_result.e_ccsd_correlation:16.10f} Ha\n" f" E(CCSD) = {cc_result.e_ccsd:16.10f} Ha\n" ) if cc_opts.compute_triples: f.write( f" E(T) correction = {cc_result.e_t:16.10f} Ha\n" f" E(CCSD(T)) = {cc_result.e_ccsd_t:16.10f} Ha\n" ) f.write( f" T1 norm = {cc_result.t1_norm:16.10f}\n" f" T2 norm = {cc_result.t2_norm:16.10f}\n" ) f.write("\n") f.flush() # Expose the CC result without mutating the pybind11 SCF # struct (def_readonly fields, no __dict__). result = _CCSDAugmented(result, cc_result) # Post-SCF Møller–Plesset MP2 (+ spin-component-scaled SCS / SOS). # RHF-reference only (closed-shell guarded early). The native C++ MP2 # carries the c_os / c_ss spin scaling; the factors are taken from the # single source of truth in vibeqc.correlation so the run_job variants # and the general/MSINDO kernels never drift. The unscaled e_os / e_ss # are always reported so a user can re-derive any variant from one run. if method in ("mp2", "scs-mp2", "sos-mp2") and bool( getattr(result, "converged", False) ): from .correlation import _MP2_SCALES _os_s, _ss_s = _MP2_SCALES[method] # Closed shell → restricted MP2; open shell → native UMP2 (UHF # reference). Both carry the c_os/c_ss spin-component scaling and # report e_hf / e_correlation / e_total; the spin channels differ # (RMP2: e_os/e_ss; UMP2: e_ab opposite-spin, e_aa+e_bb same-spin). _open = molecule.multiplicity > 1 if _open: from ._vibeqc_core import UMP2Options from ._vibeqc_core import run_ump2 as _run_ump2 _mp2_opts = UMP2Options() else: from ._vibeqc_core import MP2Options from ._vibeqc_core import run_mp2 as _run_mp2 _mp2_opts = MP2Options() _mp2_opts.c_os = _os_s _mp2_opts.c_ss = _ss_s _detail = ("U" if _open else "") + method.upper() with plog.stage("mp2", detail=_detail), PerfScope("mp2"): if _open: mp2_result = _run_ump2(molecule, basis_obj, result, _mp2_opts) else: mp2_result = _run_mp2(molecule, basis_obj, result, _mp2_opts) # Uniform opposite-/same-spin view across R/U (Grimme channels: # opposite = αβ, same = αα+ββ). _e_os = float(mp2_result.e_ab if _open else mp2_result.e_os) _e_ss = float( (mp2_result.e_aa + mp2_result.e_bb) if _open else mp2_result.e_ss ) _slog.emit( "mp2_done", variant=method, open_shell=_open, c_os=float(_os_s), c_ss=float(_ss_s), e_os=_e_os, e_ss=_e_ss, e_correlation=float(mp2_result.e_correlation), e_total=float(mp2_result.e_total), ) _ref = "UHF" if _open else "RHF" f.write("\n Møller–Plesset MP2 (" + _detail + ")\n " + "-" * 78 + "\n") f.write( f" E({_ref} reference) = {mp2_result.e_hf:16.10f} Ha\n" f" E(OS, opposite-spin) = {_e_os:16.10f} Ha" f" (c_os = {_os_s:.4f})\n" f" E(SS, same-spin) = {_e_ss:16.10f} Ha" f" (c_ss = {_ss_s:.4f})\n" f" E(MP2 correlation) = {mp2_result.e_correlation:16.10f} Ha\n" ) _total_label = f"E({method.upper()} total)" f.write(f" {_total_label:<22s} = {mp2_result.e_total:16.10f} Ha\n") f.write("\n") f.flush() # Expose the MP2 result for programmatic access. The C++ SCF # result has no __dict__, so we wrap it in a transparent proxy # (mirrors the dispersion path): result.mp2 carries the # MP2Result, result.energy_total is the MP2 total, and every # SCF attribute still forwards through. result = _MP2Augmented(result, mp2_result) # Post-SCF DLPNO-MP2 (vibeqc.dlpno.mp2): Foster-Boys-localised # occupieds, PAO domains with semicanonical virtual bases, PNO # compression with the semicanonical truncation correction, and the # coupled LMP2 residual iteration (Pinski et al., JCP 143, 034108 # (2015)). Closed-shell RHF reference only for now. The RI fitting # basis resolves per the correlation ("ri") aux family of the # orbital basis; pass DLPNOMP2Options via dlpno_options to control # thresholds / frozen core. if method == "dlpno-mp2" and bool(getattr(result, "converged", False)): if molecule.multiplicity > 1: raise ValueError( "method='dlpno-mp2' currently supports closed-shell " "(multiplicity 1) references only; open-shell DLPNO is " "a later milestone (see HANDOVER_DLPNO.md)." ) from .density_fitting import ( DensityFitting as _PyDF, default_aux_basis_for as _aux_for, ) from .dlpno.mp2 import DLPNOMP2Options, run_dlpno_mp2 _dlpno_opts = dlpno_options if dlpno_options is not None else DLPNOMP2Options() _aux_name = _aux_for(basis_obj.name, kind="ri") _aux_obj = BasisSet(molecule, _aux_name) with plog.stage("dlpno-mp2", detail=_aux_name), PerfScope("dlpno_mp2"): _df = _PyDF(basis_obj, _aux_obj, aux_basis_name=_aux_name) dlpno_result = run_dlpno_mp2( molecule, basis_obj, result, _df, _dlpno_opts ) _n_kept = dlpno_result.n_pairs _n_scr = dlpno_result.n_pairs_screened _avg_pno = ( sum(dlpno_result.pno_per_pair.values()) / _n_kept if _n_kept else 0.0 ) _slog.emit( "dlpno_mp2_done", aux_basis=_aux_name, converged=bool(dlpno_result.converged), n_iter=int(dlpno_result.n_iter), n_frozen=int(dlpno_result.n_frozen), n_pairs=int(_n_kept), n_pairs_screened=int(_n_scr), avg_pno_per_pair=float(_avg_pno), e_corr_iterated=float(dlpno_result.e_corr_iterated), e_pno_correction=float(dlpno_result.e_pno_correction), e_distant=float(dlpno_result.e_distant), e_correlation=float(dlpno_result.e_corr), e_total=float(dlpno_result.e_total), ) f.write("\n DLPNO-MP2 (Pinski 2015; RI: " + _aux_name + ")\n " + "-" * 78 + "\n") f.write( f" E(RHF reference) = {dlpno_result.e_hf:16.10f} Ha\n" f" pairs kept / screened = {_n_kept:d} / {_n_scr:d}" f" (frozen core: {dlpno_result.n_frozen:d})\n" f" avg PNOs per pair = {_avg_pno:14.1f}\n" f" E(iterated pairs) = {dlpno_result.e_corr_iterated:16.10f} Ha\n" f" E(PNO truncation corr) = {dlpno_result.e_pno_correction:16.10f} Ha\n" f" E(distant-pair est.) = {dlpno_result.e_distant:16.10f} Ha\n" f" E(DLPNO-MP2 corr) = {dlpno_result.e_corr:16.10f} Ha\n" f" E(DLPNO-MP2 total) = {dlpno_result.e_total:16.10f} Ha\n" ) if not dlpno_result.converged: f.write(" WARNING: coupled LMP2 iteration did not converge\n") f.write("\n") f.flush() result = _DLPNOMP2Augmented(result, dlpno_result) # Post-SCF DLPNO-CCSD / CCSD(T) correctness pilot # (vibeqc.dlpno.ccsd): the Riplinger-2013 PNO ansatz iterated # through the FCI-anchored spin-orbital engine. Exact physics, # O(N^6) pilot cost — hard-capped at max_nbf basis functions # until the reduced-scaling engine (M3c) lands. if method in ("dlpno-ccsd", "dlpno-ccsd(t)") and bool( getattr(result, "converged", False) ): if molecule.multiplicity > 1: raise ValueError( f"method={method!r} currently supports closed-shell " "(multiplicity 1) references only; open-shell DLPNO is " "a later milestone (see HANDOVER_DLPNO.md)." ) from .density_fitting import ( DensityFitting as _PyDF, default_aux_basis_for as _aux_for, ) from .dlpno.ccsd import ( DLPNOCCSDPilotOptions, run_dlpno_ccsd_pilot, ) _cc_opts = ( dlpno_ccsd_options if dlpno_ccsd_options is not None else DLPNOCCSDPilotOptions() ) _cc_opts.compute_triples = method == "dlpno-ccsd(t)" _aux_name = _aux_for(basis_obj.name, kind="ri") _aux_obj = BasisSet(molecule, _aux_name) with plog.stage("dlpno-ccsd", detail=_aux_name), PerfScope("dlpno_ccsd"): _df = _PyDF(basis_obj, _aux_obj, aux_basis_name=_aux_name) cc_result = run_dlpno_ccsd_pilot( molecule, basis_obj, result, _df, _cc_opts ) _avg_pno = ( sum(cc_result.pno_per_pair.values()) / cc_result.n_pairs if cc_result.n_pairs else 0.0 ) _slog.emit( "dlpno_ccsd_done", variant=method, aux_basis=_aux_name, converged=bool(cc_result.converged), n_iter=int(cc_result.n_iter), n_frozen=int(cc_result.n_frozen), n_pairs=int(cc_result.n_pairs), avg_pno_per_pair=float(_avg_pno), t1_norm=float(cc_result.t1_norm), e_ccsd_correlation=float(cc_result.e_corr), e_t=float(cc_result.e_t), e_total=float(cc_result.e_total), ) f.write( "\n DLPNO-" + ("CCSD(T)" if _cc_opts.compute_triples else "CCSD") + " pilot (Riplinger 2013; RI: " + _aux_name + ")\n " + "-" * 78 + "\n" ) f.write( f" E(RHF reference) = {cc_result.e_hf:16.10f} Ha\n" f" pairs / avg PNOs = {cc_result.n_pairs:d} / {_avg_pno:.1f}" f" (frozen core: {cc_result.n_frozen:d})\n" f" E(CCSD correlation) = {cc_result.e_corr:16.10f} Ha\n" ) if _cc_opts.compute_triples: f.write(f" E((T) correction) = {cc_result.e_t:16.10f} Ha\n") f.write( f" E(total) = {cc_result.e_total:16.10f} Ha\n" " note: O(N^6) correctness pilot (max_nbf-capped) — the\n" " reduced-scaling DLPNO engine is in progress (M3c).\n" ) if not cc_result.converged: f.write(" WARNING: CCSD pilot iteration did not converge\n") f.write("\n") f.flush() result = _DLPNOCCSDAugmented(result, cc_result) # Post-SCF OVGF / GF2 Green's-function quasiparticle IPs / EAs. Closed # shell uses the spatial diagonal second-order self-energy; open shell # (UHF reference) uses the spin-resolved spin-orbital self-energy # (vibeqc.propagator) — the electron-propagator analogue of UMP2. _ovgf_ok = method == "ovgf" and bool(getattr(result, "converged", False)) if _ovgf_ok and molecule.multiplicity != 1: import numpy as _np from ._vibeqc_core import compute_eri as _compute_eri from .propagator import ( unrestricted_quasiparticle_energies as _u_qp_energies, ) EV = 27.211386245988 Ca = _np.asarray(result.mo_coeffs_alpha) Cb = _np.asarray(result.mo_coeffs_beta) ea_mo = _np.asarray(result.mo_energies_alpha) eb_mo = _np.asarray(result.mo_energies_beta) nbf = len(ea_mo) # The spin-orbital tensor is (2·n_bf)⁴ — guard tighter than the # closed-shell n_bf⁴ path. if 2 * nbf > 80: raise NotImplementedError( f"open-shell method='ovgf' builds the full spin-orbital MO " f"two-electron tensor (2·n_bf={2 * nbf}); it is limited to " f"2·n_bf <= 80 (n_bf <= 40). Use a smaller basis." ) n_elec = molecule.n_electrons() na = (n_elec + molecule.multiplicity - 1) // 2 nb = n_elec - na # α/β frontier orbitals: spin-HOMO (IP) + spin-LUMO (EA) per channel. orbs = [] for _sp, _no in (("alpha", na), ("beta", nb)): if _no >= 1: orbs.append((_sp, _no - 1)) if _no < nbf: orbs.append((_sp, _no)) with plog.stage("ovgf", detail="open-shell GF2"), PerfScope("ovgf"): eri_ao = _np.asarray(_compute_eri(basis_obj)) res = _u_qp_energies( eri_ao, Ca, Cb, ea_mo, eb_mo, na, nb, orbs, iterate=True ) def _occ(sp, ix): return ix < (na if sp == "alpha" else nb) ips = [-q.eps_qp * EV for sp, q in res if _occ(sp, q.orbital)] eas = [-q.eps_qp * EV for sp, q in res if not _occ(sp, q.orbital)] first_ip = min(ips) if ips else None ea_val = max(eas) if eas else None _slog.emit( "ovgf_done", open_shell=True, orbitals=[[sp, q.orbital] for sp, q in res], eps_scf=[float(q.eps_scf) for sp, q in res], eps_qp=[float(q.eps_qp) for sp, q in res], pole_strength=[float(q.pole_strength) for sp, q in res], ip_first_ev=(None if first_ip is None else float(first_ip)), ea_ev=(None if ea_val is None else float(ea_val)), ) f.write( "\n OVGF / GF2 quasiparticle energies (open-shell, " "spin-resolved second-order self-energy)\n " + "-" * 78 + "\n" ) f.write( " {:>5s} {:>5s} {:>4s} {:>13s} {:>13s} {:>8s}\n".format( "spin", "MO", "occ", "Koopmans(eV)", "OVGF (eV)", "pole Z" ) ) for sp, q in res: f.write( " {:>5s} {:>5d} {:>4s} {:>13.4f} {:>13.4f} {:>8.4f}\n".format( sp, q.orbital, "occ" if _occ(sp, q.orbital) else "vir", q.eps_scf * EV, q.eps_qp * EV, q.pole_strength, ) ) f.write(" " + "-" * 78 + "\n") if first_ip is not None: f.write(f" OVGF first ionization potential = {first_ip:.4f} eV\n") if ea_val is not None: f.write( f" OVGF electron affinity (lowest virtual) = " f"{ea_val:.4f} eV" f" (small-basis EA — qualitative only)\n" ) f.write("\n") f.flush() result = _OVGFAugmented(result, res) elif _ovgf_ok: import numpy as _np from ._vibeqc_core import compute_eri as _compute_eri from .propagator import quasiparticle_energies as _qp_energies C = _np.asarray(result.mo_coeffs) eps_mo = _np.asarray(result.mo_energies) nbf = len(eps_mo) # The diagonal self-energy needs the full MO 2e tensor (n_bf⁴); guard # large systems rather than OOM on the transform. if nbf > 100: raise NotImplementedError( f"method='ovgf' builds the full MO two-electron tensor " f"(n_bf={nbf}); it is currently limited to small/medium " f"systems (n_bf <= 100). Use a smaller basis, or the " f"vibeqc.propagator API with on-demand integral blocks." ) nocc = molecule.n_electrons() // 2 with plog.stage("ovgf", detail="GF2 self-energy"), PerfScope("ovgf"): eri_ao = _np.asarray(_compute_eri(basis_obj)) eri_mo = _np.einsum( "pqrs,pi,qj,rk,sl->ijkl", eri_ao, C, C, C, C, optimize=True ) # Outer valence: up to 3 highest occupied + LUMO. lo = max(0, nocc - 3) orbs = ( list(range(lo, nocc + 1)) if nocc < nbf else list(range(lo, nocc)) ) qp = _qp_energies(eps_mo, nocc, eri_mo, orbs, iterate=True) EV = 27.211386245988 _homo_q = next(q for q in qp if q.orbital == nocc - 1) _lumo_q = next((q for q in qp if q.orbital == nocc), None) _slog.emit( "ovgf_done", orbitals=orbs, eps_scf=[float(q.eps_scf) for q in qp], eps_qp=[float(q.eps_qp) for q in qp], pole_strength=[float(q.pole_strength) for q in qp], ip_homo_ev=float(-_homo_q.eps_qp * EV), ea_lumo_ev=(None if _lumo_q is None else float(-_lumo_q.eps_qp * EV)), ) f.write( "\n OVGF / GF2 quasiparticle energies (diagonal " "second-order self-energy)\n " + "-" * 78 + "\n" ) f.write( " {:>5s} {:>6s} {:>13s} {:>13s} {:>8s}\n".format( "MO", "label", "Koopmans(eV)", "OVGF (eV)", "pole Z" ) ) for q in qp: lbl = ( "HOMO" if q.orbital == nocc - 1 else "LUMO" if q.orbital == nocc else "" ) f.write( " {:>5d} {:>6s} {:>13.4f} {:>13.4f} {:>8.4f}\n".format( q.orbital, lbl, q.eps_scf * EV, q.eps_qp * EV, q.pole_strength ) ) f.write(" " + "-" * 78 + "\n") f.write( f" OVGF ionization potential (HOMO) = {-_homo_q.eps_qp * EV:.4f} eV\n" ) if _lumo_q is not None: # EA = -eps_qp(LUMO). The diagonal GF2 self-energy gives only # qualitative EAs in a small/non-augmented basis (non-variational # virtuals; no diffuse functions for a bound anion) — flagged so # users don't read a positive value as a stable anion. f.write( f" OVGF electron affinity (LUMO) = " f"{-_lumo_q.eps_qp * EV:.4f} eV" f" (small-basis EA — qualitative only)\n" ) if 2 * nbf <= 60: # Renormalized GF2 (full third-order self-energy + geometric # screening) — trims the bare-GF2 overcorrection back toward # experiment. Small systems only (the third-order contractions # scale steeply). See vibeqc.propagator (renormalized second- # order GF; lineage Cederbaum 1975 / von Niessen 1984). from .propagator import renormalized_quasiparticle_energies as _ren_qp _ren = _ren_qp( eri_ao, C, C, eps_mo, eps_mo, nocc, nocc, [("alpha", nocc - 1)], iterate=True, )[0][1] f.write( f" GF2(renorm) ionization potential (HOMO) = " f"{-_ren.eps_qp * EV:.4f} eV" f" (renormalized; pole Z={_ren.pole_strength:.3f})\n" ) f.write("\n") f.flush() result = _OVGFAugmented(result, qp) # Atomization energy (Σ E_atom − E_mol) at the molecule's mean-field # level. Free-atom ground-state references are computed at the same # level (cached per element). Mean-field methods only (a post-SCF # method's atomization would need atomic post-SCF references too). if ( atomization and method in ("rhf", "uhf", "rks", "uks") and bool(getattr(result, "converged", False)) ): try: from .atomization import atomization_energy as _atomization _atm = _atomization( molecule, float(result.energy), method, basis, functional=functional ) _Hk = 627.509474063 _slog.emit( "atomization", method=method, functional=functional, e_molecule=_atm.e_molecule, e_atoms_sum=_atm.e_atoms_sum, atomization=_atm.atomization, atomization_per_atom=_atm.atomization_per_atom, atomic_energies={ str(z): e for z, e in _atm.atomic_energies.items() }, ) _flabel = f"/{functional}" if functional else "" f.write("\n ## Atomization energy\n " + "-" * 52 + "\n") f.write( f" Free-atom references ({method.upper()}{_flabel}/{basis}):\n" ) for _z in sorted(_atm.atomic_energies): f.write( f" Z = {_z:<3d} E = {_atm.atomic_energies[_z]:16.10f} Ha\n" ) f.write( f" Σ E(atoms) = {_atm.e_atoms_sum:16.10f} Ha\n" f" E(molecule) = {_atm.e_molecule:16.10f} Ha\n" f" Atomization energy = {_atm.atomization:16.10f} Ha" f" ({_atm.atomization_kcal:9.2f} kcal/mol)\n" f" per atom (n={_atm.n_atoms}) = " f"{_atm.atomization_per_atom * _Hk:9.2f} kcal/mol\n\n" ) f.flush() except Exception as _atm_exc: f.write( f"\n (warning: atomization not available: " f"{type(_atm_exc).__name__}: {_atm_exc})\n\n" ) f.flush() # Post-SCF dispersion (D3-BJ) as an additive correction. # Captured in a local so the composite-total sum below is # independent of whether ``result`` got wrapped in # _DispersionAugmented — a SolverResult is never wrapped (it has # no .e_dispersion attribute), and reading the term back off the # result then silently dropped D3-BJ from the composite total # (audit F1.2). e_d3bj = 0.0 if d3_params is not None: disp = compute_d3bj(molecule, d3_params) e_d3bj = float(disp.energy) e_scf = float(getattr(result, "energy", 0.0)) e_total = e_scf + e_d3bj f.write( "\n Dispersion correction (D3-BJ)\n" " " + "-" * 52 + "\n" f" {'s6':>10s} {d3_params.s6:14.6f}\n" f" {'s8':>10s} {d3_params.s8:14.6f}\n" f" {'a1':>10s} {d3_params.a1:14.6f}\n" f" {'a2':>10s} {d3_params.a2:14.6f}\n" f" {'E_disp':>10s} {disp.energy:14.8f} Ha" f" ({disp.energy * 627.5094740631:+.4f} kcal/mol)\n" f" {'E_SCF':>10s} {e_scf:14.8f} Ha\n" f" {'E_total':>10s} {e_total:14.8f} Ha\n" ) # Wrap the SCF result so callers can access both components # without breaking existing accesses to mo_coeffs, fock, etc. if not isinstance(result, SolverResult): result = _DispersionAugmented(result, e_d3bj, d3_params) # ---- Composite 3c post-SCF corrections (D4 + gCP + SRB) ------- # Only runs when a composite recipe is active. Each piece is # additive and logged separately; the composite total is the # SCF energy + every correction. e_d4 = 0.0 e_gcp = 0.0 e_srb = 0.0 if use_d4: if not dftd4_available(): raise ImportError( "composite recipe requests D4 dispersion but the " "optional 'dftd4' package is not installed.\n " "Install via `pip install -e '.[dispersion]'` or " "`pip install dftd4`." ) # dftd4 ships per-composite damping keyed on the composite # name (e.g. 'r2scan-3c', 'wb97x-3c') — distinct from the # parent-functional damping. Prefer the composite name. d4_func = ( composite_recipe.name if composite_recipe is not None else (functional or "hf") ) d4_result = compute_d4( molecule, d4_func, charge=float(molecule.charge), ) e_d4 = float(d4_result.energy) f.write( "\n Dispersion correction (D4)\n" " " + "-" * 52 + "\n" f" {'functional':>10s} {d4_result.functional!s:>14s}\n" f" {'E_disp':>10s} {e_d4:14.8f} Ha" f" ({e_d4 * 627.5094740631:+.4f} kcal/mol)\n" ) if composite_recipe is not None and composite_recipe.gcp_basis: # Damped composites (pbeh-3c / hse-3c / r2scan-3c) carry a # GCPDamping; pass it through as (dmp_scal, dmp_exp). Undamped # recipes (hf-3c, b3lyp-3c) leave it None. b97-3c has # gcp_basis=None (SRB-only) so this block is skipped for it. _gcp_damping = ( ( composite_recipe.gcp_damping.dmp_scal, composite_recipe.gcp_damping.dmp_exp, ) if composite_recipe.gcp_damping is not None else None ) try: gcp_result = compute_gcp( molecule, composite_recipe.gcp_basis, damping=_gcp_damping, ) e_gcp = float(gcp_result.energy) f.write( "\n Geometric counterpoise correction (gCP)\n" " " + "-" * 52 + "\n" f" {'basis':>10s} {gcp_result.basis_name!s:>14s}\n" f" {'E_gCP':>10s} {e_gcp:14.8f} Ha" f" ({e_gcp * 627.5094740631:+.4f} kcal/mol)\n" ) except GCPDataMissing as exc: # gCP data table not bundled for this basis — log and # continue with E_gCP = 0 so the composite total is # explicitly marked incomplete rather than silently # wrong. Same posture as the dispersion builtin-stub. f.write( "\n Geometric counterpoise correction (gCP)\n" " " + "-" * 52 + "\n" f" SKIPPED — {exc}\n" ) if composite_recipe is not None and composite_recipe.sr_mod is not None: srb = composite_recipe.sr_mod atomic_numbers = [int(a.Z) for a in molecule.atoms] positions = [list(a.xyz) for a in molecule.atoms] e_srb = float(srb.evaluate(atomic_numbers, positions)) f.write( f"\n {srb.name}\n" " " + "-" * 52 + "\n" f" {'E_SRB':>10s} {e_srb:14.8f} Ha" f" ({e_srb * 627.5094740631:+.4f} kcal/mol)\n" ) if composite_recipe is not None: composite_total = ( float(getattr(result, "energy", 0.0)) + e_d3bj + e_d4 + e_gcp + e_srb ) f.write( f"\n Composite total ({composite_recipe.name})\n" " " + "-" * 52 + "\n" f" {'E_SCF':>10s} " f"{float(getattr(result, 'energy', 0.0)):14.8f} Ha\n" f" {'E_disp':>10s} " f"{e_d3bj + e_d4:14.8f} Ha\n" f" {'E_gCP':>10s} {e_gcp:14.8f} Ha\n" f" {'E_SRB':>10s} {e_srb:14.8f} Ha\n" f" {'E_total':>10s} {composite_total:14.8f} Ha\n" ) has_mos = hasattr(result, "mo_energies") or hasattr(result, "mo_energies_alpha") if write_molden_file and has_mos: with ( plog.stage("write_molden", detail=str(molden_path.name)), PerfScope("write_molden"), ): write_molden( molden_path, molecule, basis_obj, result, title=str(output_stem.name), ) f.write(f"\n Molecular orbitals written to {molden_path.name}\n") f.flush() # Population dump (Phase O6). Always-on for molecular runs by # default — cheap to compute (the SCF density is in memory), # cheap to write, and downstream tooling shouldn't have to # screen-scrape the .out for charges + bond orders + dipole. # The matching block stays in .out for human reading; the # .txt + .json siblings are the machine-readable form. if write_population_file and has_mos: pop_txt = output_stem.parent / (output_stem.name + ".population.txt") try: with ( plog.stage("write_population", detail=str(pop_txt.name)), PerfScope("write_population"), ): write_population( output_stem, result, basis_obj, molecule, ) f.write( f" Population dump written to " f"{pop_txt.name} + " f"{output_stem.name}.population.json\n" ) f.flush() except Exception as _pop_exc: f.write( f" (warning: population dump failed: " f"{type(_pop_exc).__name__}: {_pop_exc})\n" ) f.flush() warn_writer_failure( _pop_exc, pop_txt, role="population_summary", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Volumetric cubes (Phase O6). Opt-in via write_cube=: True / # "density" / "homo" / "lumo" / list-of-those-or-int-indices. # Each cube is wrapped in its own try/except so a single # grid-evaluation failure on one MO doesn't block the others. _cube_req = parse_write_cube_kwarg(write_cube) if _cube_req and has_mos: if _cube_req.density: try: with ( plog.stage("write_cube_density"), PerfScope("write_cube_density"), ): cube_path = write_cube_density_for_run_job( output_stem, result, basis_obj, molecule, spacing=cube_spacing, padding=cube_padding, ) f.write(f" Density cube written to {cube_path.name}\n") f.flush() except Exception as _cube_exc: f.write( f" (warning: density cube write failed: " f"{type(_cube_exc).__name__}: {_cube_exc})\n" ) f.flush() warn_writer_failure( _cube_exc, output_stem.with_suffix(".density.cube"), role="density_cube", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) if _cube_req.mo_labels: try: _mo_targets = requested_mo_indices( list(_cube_req.mo_labels), result, molecule, ) except Exception as _exc: f.write( f" (warning: MO label resolution failed: " f"{type(_exc).__name__}: {_exc})\n" ) warn_writer_failure( _exc, output_stem.with_suffix(".cube"), role="mo_label_resolution", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) _mo_targets = [] for _idx, _name in _mo_targets: try: with ( plog.stage(f"write_cube_mo_{_name}"), PerfScope(f"write_cube_mo_{_name}"), ): _mo_path = write_cube_mo_for_run_job( output_stem, result, basis_obj, molecule, _idx, _name, spacing=cube_spacing, padding=cube_padding, ) f.write(f" MO cube written to {_mo_path.name}\n") f.flush() except Exception as _cube_exc: f.write( f" (warning: MO {_name} cube write failed: " f"{type(_cube_exc).__name__}: {_cube_exc})\n" ) f.flush() warn_writer_failure( _cube_exc, output_stem.with_suffix(f".{_name}.cube"), role=f"mo_cube_{_name}", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Citations (Phase O5b). Assemble the reference list for this # job (software + libint + libxc-if-DFT + basis-set + # functional + DIIS + dispersion-if-used + ECP-if-used) and # emit {stem}.bibtex + .references siblings. The matching # "## References" block is appended to the .out a few lines # below so the text log carries the same references. Failures # are non-fatal: a routing miss or a writer crash leaves a # warning in the .out and the SCF result is unaffected. cite_block_text: str | None = None _refs = None if citations: try: _cite_db = load_default_database() _dispersion_key = None _dispersion_params_key = None if d3_params is not None: # The database routes "d3bj" → Grimme 2010 + 2011. _dispersion_key = "d3bj" elif use_d4: # "d4" → Caldeweyher 2019 (method paper), plus — # for parametrizations fit outside that paper — # the damping-parameter fit paper via # routes.dispersion_params["d4:<key>"]. The key # mirrors the damping lookup in the post-SCF D4 # block above: composite name when a 3c recipe is # active, else the SCF functional ("hf" for pure # Hartree-Fock + D4). _dispersion_key = "d4" _dispersion_params_key = normalize_d4_key( composite_recipe.name if composite_recipe is not None else (functional or "hf") ) # Detect direct SCF usage for citation routing. _uses_direct = _detect_direct_scf( resolved_method, rhf_options, uhf_options, rks_options, uks_options, basis, molecule, ) # SCF accelerator picked up from opts.scf_accelerator — # fires the matching DIIS / EDIIS / ADIIS / KDIIS # citation instead of always crediting plain DIIS. _scf_accel = _detect_scf_accelerator( resolved_method, rhf_options, uhf_options, rks_options, uks_options, ) # ECP detection — libecpint citation only fires when # at least one options struct carries an ecp_centers # list (manual ECPCenter recipe or auto_ecp_centers). _uses_ecp = _detect_uses_ecp( rhf_options, uhf_options, rks_options, uks_options, ) # CPCM solvation citation fires when run_job was # invoked with a non-None ``solvent``; vibe-qc on main # only ships CPCM-style models, so the variant key # defaults to "cpcm". _uses_cpcm = solvent is not None # For composite-3c jobs (hf-3c / pbeh-3c / …) route # citations by the *composite keyword*, not the # resolved mean-field method — the composite carries # its own defining paper (+ gCP / D3) in # routes.methods. ``composite_recipe`` is None for # non-composite jobs, in which case the resolved # method name is the right routing key. # Post-SCF correlation methods run an RHF/UHF SCF first # (resolved_method = "rhf"/"uhf"), but their citation key is # the *original* method — routes.methods.{ccsd,mp2,scs-mp2,…} # carry the correlation papers (Møller–Plesset, Grimme SCS, # Jung SOS, Purvis–Bartlett …). Routing on resolved_method # would silently drop those references. _cite_method = ( composite_recipe.name if composite_recipe is not None else method if method in _POSTSCF_CITE_METHODS else resolved_method ) # MLIP engines (method="mace") evaluate no Gaussian # integrals and run no SCF — suppress the always-on # libint + DIIS routes so the references reflect what # actually ran. The MACE / e3nn / PyTorch citations come # in via routes.methods[<mlip>] instead. _is_mlip = resolved_method in _MLIP_METHODS # INDO-family engines (MSINDO molecular + periodic CCM) use # analytic Slater (STO) integrals, not libint Gaussians — so the # always-on libint integral citation must not fire (they do use # Pulay DIIS, so uses_scf stays on). _is_indo = resolved_method in ("msindo", "ccm") # Per-model MLIP foundation-model citation: the model # actually run (result.model_info) decides which foundation # paper to cite (MPA-0 -> Batatia 2024; OFF23 -> Kovács # 2023). The MACE *method* paper fires via the static # routes.methods.mace route. _mlip_extra: list[str] = [] if _is_mlip: _mi = getattr(result, "model_info", None) if _mi is not None and getattr(_mi, "citation", ""): _mlip_extra.append(_mi.citation) # UNO-CAS starting reference (Pulay-Hamilton 1988): cite when # an open-shell determinant-solver job resolved to the UHF # natural-orbital reference (the open-shell default; see the # cas_reference resolution in _run_single_point). if resolved_method in ( "selected_ci", "dmrg", "v2rdm", "transcorrelated_ci", "casci", "mrci", "casscf", "nevpt2", "caspt2", ) and ( cas_reference or ("uno" if molecule.multiplicity > 1 else "rhf") ) == "uno": _mlip_extra.append("pulay_hamilton_uno_1988") # Multi-state CASPT2: the effective-Hamiltonian formalism # (Finley 1998) fires for both modes; the extended (XMS) # rotation adds Granovsky 2011 + Shiozaki 2011. if ( resolved_method == "caspt2" and caspt2_options is not None and caspt2_options.multistate is not None ): _mlip_extra.append("finley_ms_caspt2_1998") if caspt2_options.multistate == "xms": _mlip_extra.append("granovsky_xmcqdpt2_2011") _mlip_extra.append("shiozaki_xms_caspt2_2011") # Selected-CI CASSCF backend: the CI inside the macro- # iteration is the CIPSI selection algorithm (the static # routes only fire for method="selected_ci"). if ( resolved_method == "casscf" and casscf_options is not None and casscf_options.ci_solver == "selected_ci" ): _mlip_extra.append("huron_malrieu_cipsi_1973") _mlip_extra.append("holmes_tubman_umrigar_shci_2016") # The Epstein-Nesbet PT2 stage on the selected # wavefunction cites the semistochastic-HCI paper # (the routes.methods key fires only for a literal # selected_ci_pt2 method; this hook is the live # path for the CASSCF composition). if getattr(casscf_options, "pt2", None) is not None: _mlip_extra.append("sharma_semistochastic_hci_2017") # Integral/exchange acceleration (RI-J Coulomb fitting / # RIJCOSX chain-of-spheres exchange) from the density_fit / # cosx flags on the resolved options struct. _accel = _detect_acceleration( resolved_method, rhf_options, uhf_options, rks_options, uks_options, ) # Second-order convergers (SOSCF / TRAH) cite their defining # papers when armed by a positive threshold. _uses_soscf, _uses_trah = _detect_soscf_trah( resolved_method, rhf_options, uhf_options, rks_options, uks_options, ) # Explicit non-AUTO SCF initial guess (SAD / SAP / extended # Hückel) carries a defining-paper route. _scf_guess = _detect_scf_guess( resolved_method, rhf_options, uhf_options, rks_options, uks_options, ) # Analytic-gradient + vibrational-analysis drivers. The # Hessian block (further below) runs a finite-difference # Hessian *of the analytic atomic gradient*, but only when the # SCF converged — so the Hessian and gradient driver # citations gate on convergence. Geometry optimisation also # evaluates the analytic gradient at every step. Neither # applies to MLIP engines, whose forces come from the model # (cited via routes.methods[<mlip>]), not Pulay/Hellmann- # Feynman theory. _uses_hessian = ( bool(hessian) and bool(getattr(result, "converged", False)) and not _is_mlip ) _uses_gradient = (bool(optimize) or _uses_hessian) and not _is_mlip # Population analysis (Mulliken / Löwdin charges + Mayer bond # orders) is computed and surfaced together whenever the # population dump runs (Phase O6, above) — cite each analysis # the user actually sees in the .population.* siblings + .out. # Hirshfeld is best-effort on top (Becke-Lebedev grid + SAD # promolecule, either of which can fail independently): cite it # only when it actually computed and reached the .out Atomic- # charges table for *this* job. `_prop_status["hirshfeld"]` is # the success flag format_scf_trace recorded above — gating on # it (rather than the route merely existing) keeps us from # over-claiming on a grid/promolecule build failure (§ 8). _props: list[str] = [] if write_population_file and has_mos: _props = ["mulliken", "lowdin_charges", "mayer_bond_order"] if _prop_status.get("hirshfeld"): _props.append("hirshfeld") _refs = _cite_db.assemble( method=_cite_method, basis=basis, functional=functional, dispersion=_dispersion_key, dispersion_params=_dispersion_params_key, scf_accelerator=_scf_accel, uses_ase=optimize, # BFGS path uses ASE uses_ecp=_uses_ecp, uses_cpcm=_uses_cpcm, # MSINDO COSMO uses its GEPOL cavity (+ Silla 1991); the # HF/DFT Lebedev path keeps the default "cpcm" bundle. solvent_variant=( "cosmo_gepol" if (_uses_cpcm and resolved_method == "msindo") else None ), direct_scf=_uses_direct, dft_plus_u=bool(dft_plus_u), uses_integrals=not (_is_mlip or _is_indo), uses_scf=not _is_mlip, acceleration=_accel, uses_soscf=_uses_soscf, uses_trah=_uses_trah, scf_guess=_scf_guess, uses_gradient=_uses_gradient, uses_hessian=_uses_hessian, properties=_props, extra_entries=_mlip_extra, ) # Feed the full assembled provenance into the .system # manifest's [citations] section (CLAUDE.md §8.3/§8.5), # before writing the user-facing siblings so a brittle # .bibtex write can't deprive .system of the record. _output_writer.set_citations(_citation_manifest_rows(_refs)) write_bibtex(output_stem, _refs) write_references(output_stem, _refs) cite_block_text = format_references_block(_refs) _bib_name = output_stem.with_suffix(".bibtex").name _ref_name = output_stem.with_suffix(".references").name f.write(f" Citations written to {_bib_name} + {_ref_name}\n") f.flush() except Exception as _cite_exc: f.write( f" (warning: citation emission failed: " f"{type(_cite_exc).__name__}: {_cite_exc})\n" ) f.flush() warn_writer_failure( _cite_exc, output_stem.with_suffix(".bibtex"), role="citations", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Final geometry as a plain XYZ sibling (Phase O3). ``molecule`` # at this point is the geometry the SCF actually ran on — the # optimised one when ``optimize=True``, otherwise the input. # Energy in Hartree is appended to the comment line so ASE / # Open Babel can recover the SCF result from the .xyz alone. if write_xyz_file: xyz_path = output_stem.with_suffix(".xyz") try: energy_ha = float(getattr(result, "energy", float("nan"))) except (TypeError, ValueError): energy_ha = float("nan") try: with ( plog.stage("write_xyz", detail=str(xyz_path.name)), PerfScope("write_xyz"), ): write_xyz( output_stem, molecule, energy_ha=(None if energy_ha != energy_ha else energy_ha), ) f.write(f" Final geometry written to {xyz_path.name}\n") f.flush() except Exception as _xyz_exc: # Geometry writer is best-effort — never let a `.xyz` # failure tank a finished SCF. The user can always # re-run with ``write_xyz_file=False``. f.write( f" (warning: .xyz writer failed: " f"{type(_xyz_exc).__name__}: {_xyz_exc})\n" ) f.flush() warn_writer_failure( _xyz_exc, xyz_path, role="final_xyz_geometry", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Structured log: properties event. Best-effort — the helpers # raise on basis-set shapes vibe-qc's Python side can't # parse; a failure here must never tank the run, so we wrap # each section. The event only fires when at least one # property was successfully computed. if _slog.enabled and has_mos: _props_payload: dict[str, Any] = {} try: from .properties import ( dipole_moment as _dipole, ) from .properties import ( loewdin_charges as _low, ) from .properties import ( mulliken_charges as _mul, ) _props_payload["mulliken"] = [ float(x) for x in _mul(result, basis_obj, molecule) ] _props_payload["loewdin"] = [ float(x) for x in _low(result, basis_obj, molecule) ] _dip = _dipole(result, basis_obj, molecule) _props_payload["dipole"] = { "x": float(_dip.x), "y": float(_dip.y), "z": float(_dip.z), "total": float(_dip.total), "total_debye": float(_dip.total_debye), } except Exception as _props_exc: # Properties are best-effort; the trace already # carries a degraded properties block, the structured # log just skips the event. Surface a warning so the # user knows the structured-log `properties` event was # dropped. warn_output_failure( _props_exc, output_stem.with_suffix(".out"), role="structured_log_properties", category=OutputFailureKind.compatibility_fallback, ) if _props_payload: _slog.emit("properties", **_props_payload) if optimize: if _opt_backend == "ase" and traj_path.is_file(): f.write(f" Optimization trajectory written to {traj_path.name}\n") f.flush() t_total = time.perf_counter() - t_job_start n_iter = getattr(result, "n_iter", 0) iter_avg = (t_scf / n_iter) if n_iter > 0 else float("nan") f.write("\n Timings (wall clock, seconds)\n " + "-" * 52 + "\n") if optimize: f.write(f" {'Geometry optimization':<32s} {t_opt:12.3f}\n") if resolved_method in _MLIP_METHODS: # MLIP: a single forward pass, not an SCF — honest phase label, # no per-iteration average. f.write(f" {'MLIP energy evaluation':<32s} {t_scf:12.3f}\n") else: f.write(f" {'SCF total':<32s} {t_scf:12.3f}\n") f.write( f" {'SCF avg. per iteration':<32s} {iter_avg:12.3f} ({n_iter} iters)\n" ) f.write(f" {'Job total':<32s} {t_total:12.3f}\n") f.write( f" Used {threads_in_use} OpenMP thread" f"{'s' if threads_in_use != 1 else ''}.\n" ) f.flush() plog.info(f"Job total {t_total:.2f}s — output written to {out_path}") _slog.emit( "job_end", total_wall_s=float(t_total), scf_wall_s=float(t_scf), opt_wall_s=float(t_opt), n_iter=int(getattr(result, "n_iter", 0)), converged=bool(getattr(result, "converged", False)), energy=float(getattr(result, "energy", 0.0)), out_path=str(out_path), ) # "## References" block (Phase O5b) — embed the assembled # citation list in the .out so the text log is self-contained. if cite_block_text: f.write("\n" + cite_block_text + "\n") f.flush() # --- Harmonic vibrational analysis (finite-difference Hessian) --- if hessian: _scf_converged = bool(getattr(result, "converged", False)) if not _scf_converged: f.write( "\n ## Vibrational Frequencies\n" " " + "-" * 52 + "\n" " SKIPPED -- SCF did not converge.\n" ) f.flush() else: with ( plog.stage("hessian_fd", detail="finite-difference Hessian"), PerfScope("hessian_fd"), ): try: from .hessian import ( HessianFDOptions, compute_hessian_fd, ir_intensities, ) _hess_scf_opts = { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) _hess_opts = HessianFDOptions( include_dipole_derivatives=True, frozen_indices=hessian_frozen_indices, ) hessian_result = compute_hessian_fd( molecule, basis, method=resolved_method.upper(), scf_options=_hess_scf_opts, hessian_options=_hess_opts, ) f.write( f"\n ## Vibrational Frequencies\n" f" " + "-" * 52 + "\n" f" Finite-difference Hessian" f" (step = {_hess_opts.step_bohr:.3f} bohr," f" {hessian_result.n_displacements} displacements)\n" f" Imaginary modes: {hessian_result.imaginary_count}\n" f" Linear molecule: {hessian_result.is_linear}\n\n" ) _n_skip = 5 if hessian_result.is_linear else 6 _freqs = hessian_result.frequencies_cm1 _ir = None try: _ir = ir_intensities(hessian_result) except Exception as _ir_exc: f.write( f" (warning: IR intensities not available: " f"{type(_ir_exc).__name__}: {_ir_exc})\n" ) _header = " {:<6s} {:>10s}" + ( " {:>12s}" if _ir is not None else "" ) if _ir is not None: f.write( _header.format( "Mode", "Freq/cm\u207b\u00b9", "IR/(km/mol)" ) + "\n" ) f.write(" " + "-" * 52 + "\n") for k in range(_n_skip, len(_freqs)): _label = f"{k - _n_skip + 1}" _freq = _freqs[k] _iri = _ir[k] if _freq < 0: _freq_str = f"{abs(_freq):.1f}i" else: _freq_str = f"{_freq:.1f}" f.write( f" {_label:<6s} {_freq_str:>10s} {_iri:>12.2f}\n" ) else: f.write( _header.format("Mode", "Freq/cm\u207b\u00b9") + "\n" ) f.write(" " + "-" * 52 + "\n") for k in range(_n_skip, len(_freqs)): _label = f"{k - _n_skip + 1}" _freq = _freqs[k] if _freq < 0: _freq_str = f"{abs(_freq):.1f}i" else: _freq_str = f"{_freq:.1f}" f.write(f" {_label:<6s} {_freq_str:>10s}\n") f.write("\n") f.flush() # Thermochemistry (RRHO ideal gas) from the harmonic # frequencies — ZPE + thermal U/H/S/G at T, p. Reuses # the general vibeqc.thermo engine; method-agnostic # (any reference that produced the Hessian). try: from .thermo import ( ThermoOptions as _ThermoOptions, ) from .thermo import ( compute_thermochemistry as _compute_thermo, ) _topts = thermo_options or _ThermoOptions() _thermo = _compute_thermo( molecule, hessian_result, options=_topts ) _Hced = 627.509474063 # Hartree -> kcal/mol _slog.emit( "thermochemistry", temperature=float(_topts.temperature), pressure=float(_topts.pressure), symmetry_number=int(_topts.symmetry_number), rotor_type=_thermo.rotor_type, zpe=float(_thermo.zpe), u_thermal=float(_thermo.u_thermal), h_thermal=float(_thermo.h_thermal), g_thermal=float(_thermo.g_thermal), s_total=float(_thermo.s_total), e_scf=float(getattr(result, "energy", float("nan"))), ) _e0 = float(getattr(result, "energy", float("nan"))) f.write( " ## Thermochemistry (RRHO ideal gas)\n" " " + "-" * 52 + "\n" f" T = {_topts.temperature:.2f} K" f" p = {_topts.pressure:.0f} Pa" f" σ_rot = {_topts.symmetry_number}" f" rotor = {_thermo.rotor_type}\n" ) if _thermo.n_imaginary_modes_excluded: f.write( f" (excluded {_thermo.n_imaginary_modes_excluded}" f" imaginary mode(s) from the partition function)\n" ) f.write( f" Zero-point energy = {_thermo.zpe:16.10f} Ha" f" ({_thermo.zpe * _Hced:9.3f} kcal/mol)\n" f" Thermal corr. to U = {_thermo.u_thermal:16.10f} Ha\n" f" Thermal corr. to H = {_thermo.h_thermal:16.10f} Ha\n" f" Thermal corr. to G = {_thermo.g_thermal:16.10f} Ha\n" f" Total entropy S = {_thermo.s_total:16.10f} Ha/K" f" ({_thermo.s_total * _Hced * 1000:8.3f} cal/mol/K)\n" ) if _e0 == _e0: # not NaN f.write( f" E(elec) + ZPE = {_e0 + _thermo.zpe:16.10f} Ha\n" f" H = E(elec) + H_corr = {_e0 + _thermo.h_thermal:16.10f} Ha\n" f" G = E(elec) + G_corr = {_e0 + _thermo.g_thermal:16.10f} Ha\n" ) f.write("\n") f.flush() except Exception as _thermo_exc: f.write( f" (warning: thermochemistry not available: " f"{type(_thermo_exc).__name__}: {_thermo_exc})\n\n" ) f.flush() except Exception as _hess_exc: f.write( f"\n ## Vibrational Frequencies\n" f" " + "-" * 52 + "\n" f" FAILED: {type(_hess_exc).__name__}: {_hess_exc}\n" ) f.flush() hessian_result = None # --- QVF visualisation archive (v1) ---------------------------------- if output_qvf: try: from vibeqc.output.formats.qvf import ( scf_history_from_result as _scf_history_from_result, ) from vibeqc.output.formats.qvf import write_qvf as _write_qvf _qvf_path_for_warn = output_stem.with_suffix(".qvf") _qvf_scf_history = _scf_history_from_result(result) _bibtex = None if _refs is not None: try: from vibeqc.output.citations.bibtex import format_bibtex _bibtex = format_bibtex(_refs) except Exception as _bib_exc: warn_output_failure( _bib_exc, _qvf_path_for_warn, role="qvf_bibtex_prep", category=OutputFailureKind.compatibility_fallback, ) _pop = None if write_population_file and has_mos: try: from vibeqc.output.formats.population import ( compute_population_summary, ) _pop = compute_population_summary( result, basis_obj, molecule, ) except Exception as _qpop_exc: warn_output_failure( _qpop_exc, _qvf_path_for_warn, role="qvf_population_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_vol_data = None if has_mos and _cube_req.density: try: from vibeqc.output.formats.qvf import qvf_density_data _qvf_vol_data = qvf_density_data( result, basis_obj, molecule, spacing=cube_spacing, padding=cube_padding, ) except Exception as _qvol_exc: warn_output_failure( _qvol_exc, _qvf_path_for_warn, role="qvf_density_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_mo_list = None if has_mos and _cube_req.mo_labels: try: from vibeqc.output.formats.qvf import qvf_mo_data _indices = requested_mo_indices( _cube_req.mo_labels, result, molecule ) _qvf_mo_list = qvf_mo_data( result, basis_obj, molecule, [int(i) for i, _ in _indices], spacing=cube_spacing, padding=cube_padding, ) except Exception as _qmo_exc: warn_output_failure( _qmo_exc, _qvf_path_for_warn, role="qvf_mo_prep", category=OutputFailureKind.compatibility_fallback, ) # Package the wavefunction for the QVF archive so vibe-view # can resample any MO on demand (wavefunction.gto section). # Mirrors the periodic runner; gated on write_molden_file so # the .molden sidecar and the embedded basis+MO stay in sync. _qvf_wf = None if write_molden_file and has_mos: try: from vibeqc.output.formats.qvf import qvf_wf_data _qvf_wf = qvf_wf_data(result, basis_obj, molecule) except Exception as _qwf_exc: warn_output_failure( _qwf_exc, _qvf_path_for_warn, role="qvf_wavefunction_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_path = _write_qvf( output_stem, _output_plan, molecule=molecule, result=result, method=resolved_method, basis=basis, functional=functional, population_summary=_pop, bibtex_content=_bibtex, wall_seconds=t_total, trajectory_frames=_traj_frames if _traj_frames else None, trajectory_energies=_traj_energies if _traj_energies else None, volume_data=_qvf_vol_data, mo_data=_qvf_mo_list, wf_data=_qvf_wf, hessian_result=hessian_result, scf_history_data=_qvf_scf_history, ) try: _output_writer.record(_qvf_path, wall_time_s=t_total) except Exception as _qvf_rec_exc: warn_writer_failure( _qvf_rec_exc, _qvf_path, role="qvf_manifest_record", category=OutputFailureKind.manifest_recording, writer=_output_writer, ) except Exception as _qvf_exc: warn_writer_failure( _qvf_exc, output_stem.with_suffix(".qvf"), role="qvf_archive", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Finalise the {output}.system manifest (pre-v1.0 dispatch- # overhaul — replaces the bare end-of-job write_system_manifest # call). The OutputWriter, constructed before the SCF, already # wrote the [vibeqc] / [host] / [cpu] / ... / [plan] sections + # [outputs].status = "running". Here we (1) record every declared # artefact that actually landed on disk into [[outputs.files]] # — with bytes + sha256 + wall-time — so vq's fetch / status can # see exactly what to copy back, and (2) flip the status to # "complete" with the final wall-time. A non-converged SCF still # counts as "complete": the job ran to completion; the .out + the # result object carry the non-convergence. for _pf in _output_plan.files: if _pf.path.is_file(): try: _output_writer.record(_pf.path) except Exception as _rec_exc: # record() stats + hashes the file; a brittle write # at wrap-up must never tank a finished job. We do # surface it though — a manifest with stale rows # confuses ``vq fetch``. warn_writer_failure( _rec_exc, _pf.path, role=f"manifest_record_{_pf.role}", category=OutputFailureKind.manifest_recording, writer=_output_writer, ) _output_writer.finish(wall_seconds=t_total) return result
__all__ = ["run_job"]