Source code for vibeqc.runner

"""High-level "run a job" convenience — classic QC-program workflow.

A single call writes the output text file, the molden orbital file, and
(for geometry optimization) a trajectory animation — the kind of shape
users expect from Gaussian / ORCA / NWChem:

    from vibeqc import Atom, Molecule
    from vibeqc.runner import run_job

    mol = Molecule.from_xyz("h2o.xyz")
    run_job(mol, basis="6-31g*", method="rhf", output="h2o")
    # -> h2o.out, h2o.molden

    run_job(mol, basis="6-31g*", method="rks", functional="PBE",
            optimize=True, output="h2o_opt")
    # -> h2o_opt.out, h2o_opt.molden, h2o_opt.traj

All text output goes to ``{output}.out``. The molden file is written iff
the method produces molecular orbitals (RHF / UHF / RKS / UKS) and
``write_molden=True``. Geometry optimization uses ASE's BFGS and writes
one frame per step to ``{output}.traj``.
"""

from __future__ import annotations

import os
import time
from pathlib import Path
from typing import Any, List, Literal, Optional, Union

from ._vibeqc_core import (
    BasisSet,
    D3BJParams,
    Molecule,
    RHFOptions,
    RKSOptions,
    UHFOptions,
    UKSOptions,
    get_num_threads,
    run_rhf,
    run_rks,
    run_uhf,
    run_uks,
    set_num_threads,
)
from .banner import VIBEQC_VERSION, banner, library_versions
from .composites import (
    Availability,
    CompositeRecipe,
    CompositeUnavailable,
    resolve_composite,
)
from .crash_dump import crash_dump_context, dump_on_failure
from .dispersion import compute_d3bj, d3bj_params_for
from .dispersion_d4 import compute_d4, dftd4_available
from .gcp import GCPDataMissing, compute_gcp
from .io.molden import write_molden
from .memory import check_memory, estimate_memory, format_memory_report
from .output import (
    OutputPlan,
    OutputWriter,
    dry_run_manifest,
    is_dry_run_requested,
    parse_write_cube_kwarg,
    requested_mo_indices,
    write_cube_density_for_run_job,
    write_cube_mo_for_run_job,
    write_population,
    write_xyz,
)
from .output._errors import (
    OutputFailureKind,
    warn_output_failure,
    warn_writer_failure,
)
from .output.citations import (
    format_references_block,
    load_default_database,
    write_bibtex,
    write_references,
)
from .perf import PerfScope
from .perf import perf_log as _perf_log_ctx
from .progress import ProgressLogger, resolve_progress
from .scf_log import format_scf_trace
from .solvers import (
    DMRGOptions,
    Hamiltonian,
    SelectedCIOptions,
    SolverResult,
    TranscorrelatedOptions,
    V2RDMOptions,
    build_hamiltonian_mo,
    build_transcorrelated_hamiltonian,
    get_hf_orbital_provider,
    solve_dmrg,
    solve_selected_ci,
    solve_v2rdm,
)
from .structured_log import (
    StructuredLog,
    run_fingerprint,
)
from .structured_log import (
    structured_log as _structured_log_ctx,
)

DispersionSpec = Optional[object]  # str functional name | D3BJParams | None


def _resolve_dispersion(
    dispersion: DispersionSpec,
    functional: Optional[str],
) -> Optional[D3BJParams]:
    """Normalize the ``dispersion=`` argument into a D3BJParams object,
    or None if no dispersion correction is requested.

    * ``None`` or empty string → no dispersion.
    * ``True`` / ``"d3bj"`` → use D3-BJ params for the current functional.
    * ``"pbe" / "b3lyp" / ...`` → use D3-BJ params for that functional
      explicitly (useful for HF + D3-BJ).
    * :class:`D3BJParams` instance → used as-is.
    """
    if dispersion is None or dispersion is False:
        return None
    if isinstance(dispersion, D3BJParams):
        return dispersion
    if isinstance(dispersion, str):
        key = dispersion.strip().lower()
        if key in ("", "none", "false"):
            return None
        if key in ("d3bj", "true", "yes", "on"):
            if not functional:
                raise ValueError(
                    "dispersion='d3bj' requires a DFT functional so we can "
                    "look up damping parameters; pass functional='pbe', "
                    "etc., or give a functional name directly as the "
                    "dispersion argument."
                )
            lookup = functional
        else:
            lookup = dispersion
        p = d3bj_params_for(lookup)
        if p is None:
            raise ValueError(
                f"dispersion={dispersion!r}: no D3-BJ parameters found "
                f"for functional {lookup!r} (neither builtin nor dftd3 "
                f"backend knows this name). Install the dftd3 backend "
                f"for the full Grimme parameter set: "
                f"`pip install dftd3` into your vibe-qc venv, or "
                f"`pip install -e '.[dispersion]'` from the repo checkout."
            )
        return p
    if dispersion is True:
        if not functional:
            raise ValueError(
                "dispersion=True requires a DFT functional; pass functional='pbe', etc."
            )
        return d3bj_params_for(functional)
    raise TypeError(
        f"dispersion must be None, bool, str, or D3BJParams; got "
        f"{type(dispersion).__name__}"
    )


class _DispersionAugmented:
    """Transparent wrapper around an SCF result that exposes the
    dispersion contribution as extra attributes without overriding the
    raw SCF ``.energy``. Users opt in to the total energy via
    :attr:`energy_total`; everything else (mo_energies, density, fock,
    converged, n_iter, scf_trace, …) forwards to the underlying
    pybind11 SCF-result struct.

    We wrap rather than mutate because the C++ result classes have
    ``def_readonly`` fields — they can't be extended in place from
    Python.
    """

    __slots__ = ("_scf", "e_dispersion", "dispersion_params")

    def __init__(self, scf_result, e_dispersion: float, params: D3BJParams):
        object.__setattr__(self, "_scf", scf_result)
        object.__setattr__(self, "e_dispersion", e_dispersion)
        object.__setattr__(self, "dispersion_params", params)

    def __getattr__(self, name):
        return getattr(self._scf, name)

    @property
    def energy_total(self) -> float:
        """SCF + dispersion energy (Hartree)."""
        return float(self._scf.energy) + float(self.e_dispersion)

    def __repr__(self) -> str:
        return (
            f"{type(self._scf).__name__}(energy={self._scf.energy:.10f}, "
            f"e_dispersion={self.e_dispersion:+.6e}, "
            f"energy_total={self.energy_total:.10f})"
        )


Method = Literal[
    "rhf",
    "rks",
    "uhf",
    "uks",
    "auto",
    "selected_ci",
    "dmrg",
    "v2rdm",
    "transcorrelated_ci",
    "casci",
    "casscf",
    "fci",
    "ccsd",
    "ccsd(t)",
    # Composite 3c keywords (v0.9.0 — see vibeqc.composites). The
    # dispatcher resolves these via _apply_composite before any of the
    # other Method literal branches see them.
    "hf-3c",
    "pbeh-3c",
    "b97-3c",
    "b3lyp-3c",
    "r2scan-3c",
    "wb97x-3c",
    "hse-3c",
    # Semiempirical methods.
    "dftb0",
    "scc_dftb",
    "pm6",
    "gfn2_xtb",
    "om1",
    "om2",
    "om3",
    # Machine-learning interatomic potential — external pre-trained
    # model (ACEsuit MACE). Routed through vibeqc.mlip.mace.
    "mace",
]

# Machine-learning interatomic potentials (external pre-trained models,
# routed through vibeqc.mlip). Basis-set-free like the semiempirical
# methods; unlike them, vibe-qc drives a third-party pre-trained forward
# pass (maintainer-approved CLAUDE.md §10 extension). Kept as a module
# constant so the basis-skip sites in run_job stay in sync.
_MLIP_METHODS = frozenset({"mace"})


def _select_method(
    method: Method, molecule: Molecule, functional: Optional[str]
) -> str:
    """Resolve ``method='auto'`` against molecule.multiplicity + functional.

    Heuristic wavefunction routing by electron count:
    - ≤4 electrons → FCI (exact for tiny systems)
    - ≤8 electrons → Selected-CI (systematically improvable)
    - >8 electrons → HF (via SCF)

    ``ccsd`` and ``ccsd(t)`` run RHF SCF first, then post-SCF coupled-cluster."""
    if method == "auto":
        if functional:
            return "uks" if molecule.multiplicity > 1 else "rks"
        n_elec = molecule.n_electrons()
        if n_elec <= 4:
            return "fci"
        if n_elec <= 8:
            return "selected_ci"
        return "uhf" if molecule.multiplicity > 1 else "rhf"
    if method in ("ccsd", "ccsd(t)"):
        return "rhf"
    return method


def _apply_composite(
    method: str,
    *,
    basis: Optional[str],
    functional: Optional[str],
    dispersion,
    molecule: Molecule,
) -> tuple[str, str, Optional[str], object, Optional[CompositeRecipe]]:
    """Resolve a composite-3c ``method=`` keyword (``"hf-3c"``,
    ``"pbeh-3c"``, …) into the (method, basis, functional, dispersion,
    recipe) tuple the rest of :func:`run_job` consumes.

    Non-composite ``method`` values pass through unchanged with
    ``recipe = None``. For a composite:

    * basis / functional / dispersion are taken from the recipe when
      the caller left them at defaults; an explicit caller basis wins
      with a warning (deliberate-experiment escape hatch).
    * D3-BJ recipes carry their own re-fit damping (``d3bj_damping``);
      it is passed straight through as a :class:`D3BJParams` so the
      dispersion module doesn't fall back to a wrong per-functional
      lookup.
    * Recipes flagged ``PENDING_*`` raise :class:`CompositeUnavailable`
      with a pointer at the gating roadmap item.
    """
    recipe = resolve_composite(method)
    if recipe is None:
        return method, basis or "", functional, dispersion, None

    if recipe.availability in (
        Availability.PENDING_F1,
        Availability.PENDING_F3,
        Availability.PENDING_ECP,
    ):
        raise CompositeUnavailable(
            f"method={method!r}: {recipe.availability.value}. "
            f"This composite depends on infrastructure not yet on "
            f"main. See docs/user_guide/composites.md § Availability.\n"
            f"  Notes: {recipe.notes}"
        )

    resolved_basis = basis if basis else recipe.basis
    if basis and basis.lower() != recipe.basis.lower():
        import warnings

        warnings.warn(
            f"method={method!r} specifies basis={recipe.basis!r}, but "
            f"caller passed basis={basis!r}. Honouring the explicit "
            f"basis — the published 3c parameters were fit at the "
            f"composite's native basis and may not transfer.",
            stacklevel=2,
        )
    resolved_functional = functional if functional else recipe.functional

    if dispersion is None:
        if recipe.dispersion == "d3bj" and recipe.d3bj_damping is not None:
            resolved_dispersion: object = D3BJParams(
                s6=recipe.d3bj_damping.s6,
                s8=recipe.d3bj_damping.s8,
                a1=recipe.d3bj_damping.a1,
                a2=recipe.d3bj_damping.a2,
            )
        else:
            resolved_dispersion = recipe.dispersion
    else:
        resolved_dispersion = dispersion

    # Pure-HF composites (recipe.functional is None — only HF-3c today)
    # must route straight to the HF SCF driver. Handing "auto" to
    # _select_method with no functional would drop into the
    # wavefunction auto-ladder (≤4e → FCI, ≤8e → selected-CI), so a
    # turnkey hf-3c on H2 silently ran fci(ndet=4) instead of HF
    # (audit F1.1). DFT composites keep "auto" → _select_method picks
    # rks/uks from multiplicity.
    if resolved_functional is None:
        resolved_method = "uhf" if molecule.multiplicity > 1 else "rhf"
    else:
        resolved_method = "auto"
    return (
        resolved_method,
        resolved_basis,
        resolved_functional,
        resolved_dispersion,
        recipe,
    )


def _detect_scf_accelerator(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> Optional[str]:
    """Return the lower-cased SCFAccelerator name (``"diis"`` / ``"ediis"``
    / ``"ediis_diis"`` / ``"kdiis"`` / ``"adiis"``) actually used by the
    resolved SCF, or ``None`` when no options struct is in play (post-SCF
    / non-mean-field methods).

    The citation router uses this to fire the right SCF-accelerator
    references — Hu-Yang 2010 for ADIIS, Garza-Scuseria 2012 for KDIIS,
    Kudin-Scuseria-Cancès 2002 for EDIIS — instead of always crediting
    plain DIIS (Pulay 1980 / 1982).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return None
    accel = getattr(opts, "scf_accelerator", None)
    if accel is None:
        return None
    name = getattr(accel, "name", None) or str(accel)
    return name.strip().lower()


def _detect_uses_ecp(
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
) -> bool:
    """Return True when any options struct carries a non-empty
    ``ecp_centers`` list — the runtime signal that libecpint will build
    the V_ECP operator.

    Conservative: returns False when no options struct is present at
    all (post-SCF / FCI / selected-CI paths build their own HF prep
    without going through these options).
    """
    for opts in (rhf_options, uhf_options, rks_options, uks_options):
        if opts is None:
            continue
        centers = getattr(opts, "ecp_centers", None)
        if centers:  # non-empty list
            return True
    return False


def _detect_direct_scf(
    resolved_method: str,
    rhf_options: Optional[RHFOptions],
    uhf_options: Optional[UHFOptions],
    rks_options: Optional[RKSOptions],
    uks_options: Optional[UKSOptions],
    basis_name: str,
    molecule: Molecule,
) -> bool:
    """Return True when the SCF will use the direct (integral-driven)
    Fock build path — either because ``scf_mode=DIRECT`` was explicitly
    set, or because AUTO resolved to DIRECT (basis larger than the
    auto threshold, and density_fit/cosx are off).

    Conservative: returns False when we can't determine the resolved
    mode (no options struct, or density_fit/cosx supersedes).
    """
    opts = {
        "rhf": rhf_options,
        "uhf": uhf_options,
        "rks": rks_options,
        "uks": uks_options,
    }.get(resolved_method)
    if opts is None:
        return False

    # density_fit + cosx supersede the four-index path — direct SCF
    # screening is not used, so the direct-SCF references don't apply.
    if getattr(opts, "density_fit", False):
        return False

    scf_mode = getattr(opts, "scf_mode", None)
    if scf_mode is None:
        return False

    from ._vibeqc_core import SCFMode

    if scf_mode == SCFMode.DIRECT:
        return True
    if scf_mode == SCFMode.AUTO:
        threshold = getattr(opts, "scf_mode_auto_threshold", 200)
        n_bf = BasisSet(molecule, basis_name).nbasis
        return n_bf > threshold
    # CONVENTIONAL — not direct.
    return False


def _run_single_point(
    method: str,
    molecule: Molecule,
    basis: BasisSet,
    *,
    functional: Optional[str],
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
    selected_ci_options: Optional[SelectedCIOptions] = None,
    dmrg_options: Optional[DMRGOptions] = None,
    v2rdm_options: Optional[V2RDMOptions] = None,
    transcorrelated_options: Optional[TranscorrelatedOptions] = None,
    active_space: Optional[tuple[int, int]] = None,
    solvent: object = None,
    dft_plus_u: Optional[List["HubbardSite"]] = None,
):
    """Single-point dispatcher. ``solvent`` (v0.9.0) reroutes through
    :func:`vibeqc.solvation.run_cpcm_scf` and returns the underlying
    SCF result with an ``e_solv`` / ``solvent_result`` attribute
    attached for downstream output writers."""
    method_opts = {
        "rhf": rhf_options or RHFOptions(),
        "uhf": uhf_options or UHFOptions(),
    }
    if method in ("rks", "uks"):
        opts = (rks_options if method == "rks" else uks_options) or (
            RKSOptions() if method == "rks" else UKSOptions()
        )
        opts.functional = functional or opts.functional or "lda"
        method_opts[method] = opts

    if solvent is not None and method in ("rhf", "uhf", "rks", "uks"):
        if dft_plus_u:
            raise NotImplementedError(
                "DFT+U combined with implicit solvation (CPCM) is not yet "
                "supported. The +U Fock term and the solvation macro-"
                "iteration would need to be combined inside "
                "vibeqc.solvation.run_cpcm_scf; defer until Increment "
                "3+ or run +U without solvent."
            )
        # CPCM macro-iteration wrapper. The C++ result types
        # (RHFResult / UHFResult / RKSResult / UKSResult) are
        # pybind11 ``def_readonly`` — Python attribute set on them
        # silently fails inside try/except. Wrap the inner result in
        # an attribute-forwarding proxy so callers can still write
        # ``result.energy`` / ``result.density`` / etc., and *also*
        # read the new solvation diagnostics (``result.solvent_result``,
        # ``result.e_solv``, ``result.energy_in_solvent``).
        from .solvation import run_cpcm_scf
        from .solvation.driver import _solvent_aware_scf_result

        sol = run_cpcm_scf(
            molecule,
            basis,
            method=method,
            solvent=solvent,
            options=method_opts[method],
        )
        return _solvent_aware_scf_result(sol)

    if dft_plus_u and method in ("rhf", "uhf", "rks", "uks"):
        from .dft_plus_u import _apply_dft_plus_u_to_options

        _apply_dft_plus_u_to_options(method_opts[method], basis, dft_plus_u)

    if method == "rhf":
        return run_rhf(molecule, basis, method_opts["rhf"])
    if method == "uhf":
        return run_uhf(molecule, basis, method_opts["uhf"])
    if method == "rks":
        return run_rks(molecule, basis, method_opts["rks"])
    if method == "uks":
        return run_uks(molecule, basis, method_opts["uks"])

    # ── Non-mean-field wavefunction methods ──
    if method in (
        "selected_ci",
        "dmrg",
        "v2rdm",
        "transcorrelated_ci",
        "casci",
        "casscf",
        "nevpt2",
        "caspt2",
    ):
        import numpy as np

        hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        H = build_hamiltonian_mo(molecule, basis, C)

        # Optional active-space truncation.  The CAS methods (casci / nevpt2 /
        # caspt2) are handled separately below — they keep the full MO
        # integrals and dress an explicit frozen core (standard CAS
        # convention), rather than dropping the core + virtual orbitals the
        # way these CI/DMRG/v2RDM backends do.
        if active_space is not None and method not in (
            "casci", "casscf", "nevpt2", "caspt2"
        ):
            n_active, n_elec = active_space
            n_frozen = max(0, H.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            H = build_hamiltonian_mo(molecule, basis, C_act)
            H.nelec = n_elec
            H.ms2 = 0

        if method == "transcorrelated_ci":
            if transcorrelated_options is None:
                transcorrelated_options = TranscorrelatedOptions()
            H = build_transcorrelated_hamiltonian(H, transcorrelated_options)
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "selected_ci":
            return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())

        if method == "dmrg":
            return solve_dmrg(H, dmrg_options or DMRGOptions())

        if method == "v2rdm":
            return solve_v2rdm(H, v2rdm_options or V2RDMOptions())

        if method in ("casci", "casscf", "nevpt2", "caspt2"):
            from .solvers._casci import casci as _run_casci

            # Standard CAS partition: full MO integrals with an explicit frozen
            # core.  active_space=(n_active_orb, n_active_elec) selects the
            # lowest n_core orbitals as doubly-occupied core, n_active_orb
            # active orbitals, the rest virtual (matching PySCF mcscf.CASCI).
            # H here is the full, untruncated Hamiltonian.
            n_elec_total = molecule.n_electrons()
            if active_space is not None:
                n_active_orb, n_active_elec = active_space
                if (n_elec_total - n_active_elec) % 2 != 0:
                    raise ValueError(
                        "CAS active_space=(n_orb, n_elec) needs an even "
                        f"frozen-core electron count; got n_elec_total="
                        f"{n_elec_total}, n_active_elec={n_active_elec}."
                    )
                n_core = (n_elec_total - n_active_elec) // 2
            else:
                n_active_orb, n_active_elec, n_core = H.norb, n_elec_total, 0
            n_virt = H.norb - n_core - n_active_orb

            if method == "casscf":
                from .solvers._casscf import casscf as _run_casscf

                sc = _run_casscf(
                    H.h1e,
                    H.h2e,
                    n_active_elec=n_active_elec,
                    n_active_orb=n_active_orb,
                    n_core=n_core,
                    nuclear_repulsion=H.nuclear_repulsion,
                    ms2=molecule.multiplicity - 1,
                )
                return SolverResult(
                    energy=sc.e_total,
                    method=f"casscf({n_active_elec}e,{n_active_orb}o)",
                    converged=sc.converged,
                    n_iter=sc.n_iter,
                    energy_trace=sc.energy_trace or [sc.e_total],
                )

            cas = _run_casci(
                H.h1e,
                H.h2e,
                n_active_elec=n_active_elec,
                n_active_orb=n_active_orb,
                n_core=n_core,
                nuclear_repulsion=H.nuclear_repulsion,
                ms2=molecule.multiplicity - 1,
            )

            if method == "casci":
                energy, label = cas.e_total, f"casci({n_active_elec}e,{n_active_orb}o)"
            elif method == "nevpt2":
                from .solvers._mrpt import nevpt2 as _run_nevpt2

                pt = _run_nevpt2(cas, H.h1e, H.h2e, n_core=n_core, n_virt=n_virt)
                energy = pt.e_total
                label = f"nevpt2({n_active_elec}e,{n_active_orb}o)"
            else:  # caspt2 — gated inside _run_caspt2 (under validation)
                from .solvers._mrpt import caspt2 as _run_caspt2

                pt = _run_caspt2(cas, H.h1e, H.h2e, n_core=n_core, n_virt=n_virt)
                energy = pt.e_total
                label = f"caspt2({n_active_elec}e,{n_active_orb}o)"

            return SolverResult(
                energy=energy,
                method=label,
                # Direct CI + non-iterative PT2: a returned result means the
                # eigensolve succeeded.
                converged=True,
                n_iter=1,
                energy_trace=[energy],
            )

    if method == "fci":
        import numpy as np
        from scipy.linalg import eigh

        from .solvers import (
            build_hamiltonian_matrix_unrestricted,
            diagonal_matrix_element_unrestricted,
            generate_determinants,
        )

        hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
        C = get_hf_orbital_provider(molecule, basis, method=hf_method)
        ham = build_hamiltonian_mo(molecule, basis, C)

        if active_space is not None:
            n_active, n_elec = active_space
            n_frozen = max(0, ham.norb - n_active)
            C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
            ham = build_hamiltonian_mo(molecule, basis, C_act)
            ham.nelec = n_elec

        norb = ham.norb
        nelec = ham.nelec
        nalpha = (nelec + ham.ms2) // 2
        nbeta = (nelec - ham.ms2) // 2

        all_dets = generate_determinants(norb, nalpha, nbeta)
        H = build_hamiltonian_matrix_unrestricted(all_dets, ham.h1e, ham.h2e)
        evals, evecs = eigh(H)
        e_fci = evals[0] + ham.nuclear_repulsion

        return SolverResult(
            energy=e_fci,
            method=f"fci(ndet={len(all_dets)})",
            converged=True,
            n_iter=1,
            energy_trace=[e_fci],
            ci_coeffs=evecs[:, 0],
            ci_labels=all_dets,
        )

    # ── Semiempirical methods ──
    se_methods = {
        "dftb0",
        "scc_dftb",
        "pm6",
        "gfn2_xtb",
        "om1",
        "om2",
        "om3",
    }
    if method in se_methods:
        return _run_semiempirical(method, molecule)

    # ── Machine-learning interatomic potentials (MACE, …) ──
    if method in _MLIP_METHODS:
        return _run_mlip(method, molecule)

    raise ValueError(
        f"Unknown method {method!r}; use 'rhf', 'uhf', 'rks', 'uks', "
        f"'selected_ci', 'dmrg', 'v2rdm', 'transcorrelated_ci', 'casci', "
        f"'casscf', 'nevpt2', 'caspt2', 'fci', "
        f"'ccsd', 'ccsd(t)', 'dftb0', 'scc_dftb', 'pm6', 'gfn2_xtb', "
        f"'om1', 'om2', 'om3', 'mace', or 'auto'"
    )


def _run_semiempirical(method: str, molecule: Molecule):
    """Run a semiempirical calculation and return a result-like object."""
    import numpy as np

    # ── DFTB methods ──
    if method in ("dftb0", "scc_dftb"):
        from vibeqc.semiempirical import DFTB0Model, SCCDFTBModel
        from vibeqc.semiempirical.parameters import default_parameters

        params = default_parameters()
        if method == "dftb0":
            model = DFTB0Model(molecule, params=params)
        else:
            model = SCCDFTBModel(molecule, params=params)
        g = model.gradient() if hasattr(model, "gradient") else None
        return _SEMPR(model.energy(), g, method, converged=True, n_iter=1)

    # ── GFN2-xTB (experimental — model.energy() emits GFN2ExperimentalWarning) ──
    if method == "gfn2_xtb":
        from vibeqc.semiempirical.methods.gfn2 import GFN2Model
        from vibeqc.semiempirical.methods.gfn2_params import load_gfn2_params

        params = load_gfn2_params()
        model = GFN2Model(molecule, params)
        energy = model.energy()
        return _SEMPR(
            energy,
            None,
            method,
            converged=model.converged,
            n_iter=model.n_iter,
        )

    # ── NDDO methods (PM6, OM1, OM2, OM3) ──
    from vibeqc._vibeqc_core.semiempirical.nddo import (
        run_omx,
        run_pm6,
    )

    if method == "pm6":
        from vibeqc.semiempirical.methods.pm6_params import load_pm6_params_auto

        params = load_pm6_params_auto([at.Z for at in molecule.atoms])

        if molecule.multiplicity > 1:
            from vibeqc._vibeqc_core.semiempirical.nddo import (
                compute_upm6_gradient_fd,
                run_upm6,
            )

            result_obj = run_upm6(molecule, params, max_iter=200)
            try:
                g = compute_upm6_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        else:
            result_obj = run_pm6(molecule, params, max_iter=200)
            from vibeqc._vibeqc_core.semiempirical.nddo import compute_pm6_gradient_fd

            try:
                g = compute_pm6_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        return result

    if method in ("om1", "om2", "om3"):
        from vibeqc.semiempirical.methods.omx_params import (
            load_om1_params,
            load_om2_params,
            load_om3_params,
        )

        loader = {
            "om1": load_om1_params,
            "om2": load_om2_params,
            "om3": load_om3_params,
        }[method]
        params = loader()

        if molecule.multiplicity > 1:
            from vibeqc._vibeqc_core.semiempirical.nddo import (
                compute_uomx_gradient_fd,
                run_uomx,
            )

            result_obj = run_uomx(molecule, params, max_iter=200)
            try:
                g = compute_uomx_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        else:
            result_obj = run_omx(molecule, params, max_iter=200)
            from vibeqc._vibeqc_core.semiempirical.nddo import compute_omx_gradient_fd

            try:
                g = compute_omx_gradient_fd(molecule, params)
            except Exception:
                g = None
            result = _SEMPR(
                float(result_obj.energy),
                g,
                method,
                converged=bool(result_obj.converged),
                n_iter=int(result_obj.n_iter),
            )
        return result

    raise ValueError(f"Unhandled semiempirical method: {method!r}")


def _run_mlip(method: str, molecule: Molecule):
    """Run a machine-learning interatomic potential and return a
    result-like object (duck-typing the SCF-result interface).

    vibe-qc drives the external pre-trained model's forward pass; the
    returned ``.energy`` is the model's reference-shifted DFT-surface
    energy (Hartree), NOT a total electronic energy (``CLAUDE.md`` §10 /
    energy-scale caveat — do not mix with HF/DFT totals).
    """
    if method == "mace":
        from vibeqc.mlip.mace import MACEModel

        model = MACEModel(molecule)
        return _MLIPResult(
            model.energy(),
            model.gradient(),
            method,
            converged=True,
            n_iter=1,
        )
    raise ValueError(f"Unhandled MLIP method: {method!r}")


class _SEMPR:
    """Semiempirical result wrapper — duck-typing the interface
    format_scf_trace and run_job expect."""

    def __init__(self, energy, gradient=None, method_name="", converged=True, n_iter=1):
        self.energy = energy
        self._gradient = gradient
        self.method = method_name
        self.converged = converged
        self.n_iter = n_iter
        self.scf_trace = []

    def gradient(self):
        return self._gradient


class _MLIPResult:
    """Machine-learning-interatomic-potential result wrapper — duck-types
    the same interface as :class:`_SEMPR` (energy, gradient(), method,
    converged, n_iter, scf_trace). ``energy`` is the model's
    reference-shifted DFT-surface energy (Hartree), not a total electronic
    energy."""

    def __init__(
        self, energy, gradient=None, method_name="", converged=True, n_iter=1
    ):
        self.energy = energy
        self._gradient = gradient
        self.method = method_name
        self.converged = converged
        self.n_iter = n_iter
        self.scf_trace = []

    def gradient(self):
        return self._gradient


def _format_solver_trace(result: SolverResult) -> str:
    """Format a non-mean-field solver result for the .out file."""
    import numpy as np

    lines = []
    lines.append(f"  Method:            {result.method}")
    lines.append(f"  Total energy:      {result.energy:16.10f} Ha")
    lines.append(f"  Converged:         {result.converged}")
    lines.append(f"  Iterations/sweeps: {result.n_iter}")
    if result.pt2_correction is not None and abs(result.pt2_correction) > 1e-12:
        e_var = result.energy - result.pt2_correction
        lines.append(f"  E(variational):    {e_var:16.10f} Ha")
        lines.append(f"  E(PT2 correction): {result.pt2_correction:16.10f} Ha")
    if result.bond_dim is not None:
        lines.append(f"  Bond dimension:    {result.bond_dim}")
    if result.truncation_error is not None:
        lines.append(f"  Truncation error:  {result.truncation_error:.2e}")
    if result.constraint_residual is not None:
        lines.append(f"  Constraint resid.: {result.constraint_residual:.2e}")
    if result.ci_labels is not None and result.ci_coeffs is not None:
        lines.append(f"  Determinants:      {len(result.ci_labels)}")
        c_abs = np.abs(result.ci_coeffs)
        n_show = min(8, len(c_abs))
        idx = np.argsort(-c_abs)
        lines.append("  Leading configurations:")
        norb = 0
        for rank in range(n_show):
            i = idx[rank]
            label = result.ci_labels[i]
            # Format SpinDet as α|...⟩ β|...⟩, Det as |...⟩
            if (
                isinstance(label, tuple)
                and len(label) == 2
                and isinstance(label[0], tuple)
            ):
                a_str = "".join(
                    "1" if j in label[0] else "0"
                    for j in range(max(label[0]) + 1 if label[0] else 0)
                )
                b_str = "".join(
                    "1" if j in label[1] else "0"
                    for j in range(max(label[1]) + 1 if label[1] else 0)
                )
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  α|{a_str}⟩ β|{b_str}⟩")
            else:
                norb = max(norb, max(label) + 1 if label else 0)
                occ_str = "".join("1" if j in label else "0" for j in range(norb))
                lines.append(f"    |c_{rank}| = {c_abs[i]:.6f}  |{occ_str}⟩")
    if result.energy_trace:
        lines.append("  Energy trace:")
        for i, e in enumerate(result.energy_trace):
            lines.append(f"    iter {i + 1:4d}:  E = {e:16.10f} Ha")
    return "\n".join(lines)


def _geom_summary(molecule: Molecule) -> str:
    """Three-column per-atom geometry listing (bohr)."""
    lines = ["  Atoms (bohr)", "  " + "-" * 54]
    for i, atom in enumerate(molecule.atoms, start=1):
        lines.append(
            f"  {i:4d}  Z={atom.Z:3d}   "
            f"{atom.xyz[0]:14.8f}  {atom.xyz[1]:14.8f}  {atom.xyz[2]:14.8f}"
        )
    lines.append(
        f"  charge={molecule.charge}  multiplicity={molecule.multiplicity}  "
        f"n_electrons={molecule.n_electrons()}"
    )
    return "\n".join(lines)


def _job_header(method: str, basis_name: str, functional: Optional[str]) -> str:
    label = f"{method.upper()}"
    if method in ("rks", "uks") and functional:
        label = f"{label} / {functional}"
    if method in ("ccsd", "ccsd(t)"):
        label = f"RHF + {method.upper()}"
    return f"  Job: {label}  basis={basis_name}"


def _make_semiempirical_ase_calculator(
    molecule: Molecule,
    method: str,
):
    """Return an ASE Calculator that computes energy via the
    semiempirical dispatcher and forces via central finite differences."""
    try:
        from ase.calculators.calculator import Calculator
        from ase.units import Bohr, Hartree
    except ImportError:
        raise ImportError("Geometry optimization requires ASE: pip install ase")

    import numpy as np

    from ._vibeqc_core import Atom as _Atom
    from ._vibeqc_core import Molecule as _Molecule

    h = 0.001  # bohr

    class _SemiempiricalCalculator(Calculator):
        implemented_properties = ["energy", "forces"]

        def calculate(self, atoms, properties, system_changes):
            positions_bohr = atoms.positions / Bohr
            mol = _Molecule(
                [
                    _Atom(int(z), list(xyz))
                    for z, xyz in zip(atoms.numbers, positions_bohr)
                ],
                charge=molecule.charge,
                multiplicity=molecule.multiplicity,
            )

            result = _run_semiempirical(method, mol)
            energy_ha = float(getattr(result, "energy", 0.0))
            self.results["energy"] = energy_ha * Hartree

            if "forces" in properties:
                forces_ha_bohr = np.zeros((len(atoms), 3))
                for i in range(len(atoms)):
                    for c in range(3):
                        pos_plus = positions_bohr.copy()
                        pos_plus[i, c] += h
                        mol_p = _Molecule(
                            [
                                _Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_plus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        e_p = float(
                            getattr(_run_semiempirical(method, mol_p), "energy", 0.0)
                        )

                        pos_minus = positions_bohr.copy()
                        pos_minus[i, c] -= h
                        mol_m = _Molecule(
                            [
                                _Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_minus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        e_m = float(
                            getattr(_run_semiempirical(method, mol_m), "energy", 0.0)
                        )

                        forces_ha_bohr[i, c] = (e_m - e_p) / (2.0 * h)
                self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)

    return _SemiempiricalCalculator()


def _make_wavefunction_ase_calculator(
    molecule: Molecule,
    basis_name: str,
    method: str,
    *,
    functional=None,
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    active_space=None,
    dft_plus_u: Optional[List["HubbardSite"]] = None,
):
    """Return an ASE Calculator that computes energy via the wavefunction
    solver and forces via central finite differences (h = 0.001 bohr).

    When ``dft_plus_u`` is set, the per-geometry SCF (energy + each FD
    gradient probe) runs with +U via :func:`_run_single_point`'s
    ``dft_plus_u`` kwarg. The analytic +U gradient (Increment 3) is
    *not* used by this calculator — FD is good enough for BFGS step
    direction and avoids re-plumbing the calculator's force-call
    contract. For a tight optimization with analytic +U gradient,
    call :func:`vibeqc.compute_gradient` (or sibling) directly with
    ``dft_plus_u=`` per-step in your own loop.
    """
    try:
        from ase.calculators.calculator import Calculator
        from ase.units import Bohr, Hartree
    except ImportError:
        raise ImportError("Geometry optimization requires ASE: pip install ase")

    import numpy as np

    from ._vibeqc_core import Atom, BasisSet
    from ._vibeqc_core import Molecule as _Molecule

    h = 0.001  # bohr — central-finite-difference step

    class _WavefunctionCalculator(Calculator):
        implemented_properties = ["energy", "forces"]

        def calculate(self, atoms, properties, system_changes):
            # Convert ASE Atoms → vibeqc Molecule
            positions_bohr = atoms.positions / Bohr
            mol = _Molecule(
                [
                    Atom(int(z), list(xyz))
                    for z, xyz in zip(atoms.numbers, positions_bohr)
                ],
                charge=molecule.charge,
                multiplicity=molecule.multiplicity,
            )
            basis = BasisSet(mol, basis_name)

            # Compute energy at current geometry
            result = _run_single_point(
                method,
                mol,
                basis,
                functional=functional,
                selected_ci_options=selected_ci_options,
                dmrg_options=dmrg_options,
                v2rdm_options=v2rdm_options,
                transcorrelated_options=transcorrelated_options,
                active_space=active_space,
                dft_plus_u=dft_plus_u,
            )
            energy_ha = float(getattr(result, "energy", 0.0))
            self.results["energy"] = energy_ha * Hartree

            # Numerical forces via central differences
            if "forces" in properties:
                forces_ha_bohr = np.zeros((len(atoms), 3))
                for i in range(len(atoms)):
                    for c in range(3):
                        # +h displacement
                        pos_plus = positions_bohr.copy()
                        pos_plus[i, c] += h
                        mol_plus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_plus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_plus = BasisSet(mol_plus, basis_name)
                        result_plus = _run_single_point(
                            method,
                            mol_plus,
                            basis_plus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            active_space=active_space,
                            dft_plus_u=dft_plus_u,
                        )
                        e_plus = float(getattr(result_plus, "energy", 0.0))

                        # -h displacement
                        pos_minus = positions_bohr.copy()
                        pos_minus[i, c] -= h
                        mol_minus = _Molecule(
                            [
                                Atom(int(z), list(xyz))
                                for z, xyz in zip(atoms.numbers, pos_minus)
                            ],
                            charge=molecule.charge,
                            multiplicity=molecule.multiplicity,
                        )
                        basis_minus = BasisSet(mol_minus, basis_name)
                        result_minus = _run_single_point(
                            method,
                            mol_minus,
                            basis_minus,
                            functional=functional,
                            selected_ci_options=selected_ci_options,
                            dmrg_options=dmrg_options,
                            v2rdm_options=v2rdm_options,
                            transcorrelated_options=transcorrelated_options,
                            active_space=active_space,
                            dft_plus_u=dft_plus_u,
                        )
                        e_minus = float(getattr(result_minus, "energy", 0.0))

                        # Central difference: F = -dE/dR
                        forces_ha_bohr[i, c] = -(e_plus - e_minus) / (2 * h)

                self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)

    return _WavefunctionCalculator()


def _optimize_geometry(
    molecule: Molecule,
    basis_name: str,
    *,
    functional: Optional[str],
    trajectory_path: Path,
    fmax: float,
    max_steps: int,
    dispersion_params: Optional[D3BJParams] = None,
    method: str = "rhf",
    selected_ci_options=None,
    dmrg_options=None,
    v2rdm_options=None,
    transcorrelated_options=None,
    active_space=None,
    rhf_options: Optional[RHFOptions] = None,
    uhf_options: Optional[UHFOptions] = None,
    rks_options: Optional[RKSOptions] = None,
    uks_options: Optional[UKSOptions] = None,
) -> Molecule:
    """Run an ASE/BFGS geometry optimization, writing frames to a .traj file.

    Returns a new Molecule at the optimized geometry. The SCF options
    apply to every intermediate geometry, not just the final point — use
    them to bump ``max_iter`` / add damping for systems where BFGS
    sometimes lands on a hard-to-converge geometry (H-bonded clusters,
    near-degenerate states).

    For wavefunction methods (``selected_ci``, ``dmrg``, ``v2rdm``,
    ``transcorrelated_ci``), forces are computed via central finite
    differences because the solvers do not yet provide analytic gradients.
    """
    try:
        from ase import Atoms
        from ase.io.trajectory import Trajectory
        from ase.optimize import BFGSLineSearch
        from ase.units import Bohr
    except ImportError as exc:
        raise ImportError(
            "Geometry optimization requires ASE. Install it with "
            "`pip install ase` into your vibe-qc venv."
        ) from exc

    from .ase import VibeQC

    positions_ang = [[coord * Bohr for coord in atom.xyz] for atom in molecule.atoms]
    atoms = Atoms(
        numbers=[atom.Z for atom in molecule.atoms],
        positions=positions_ang,
    )

    _se_methods = {"dftb0", "scc_dftb", "pm6", "gfn2_xtb", "om1", "om2", "om3"}
    wavefunction_methods = {"selected_ci", "dmrg", "v2rdm", "transcorrelated_ci"}
    if method in _se_methods:
        atoms.calc = _make_semiempirical_ase_calculator(
            molecule,
            method,
        )
    elif method in _MLIP_METHODS:
        # MACE geometry optimization (ASE BFGS on the MACE calculator) is
        # wired + tested in M3, once the <=3.13 CI lane can exercise it
        # end-to-end. Single-point method="mace" works today.
        raise NotImplementedError(
            f"method={method!r}: geometry optimization is not yet wired "
            f"(single-point only in this milestone). See docs/roadmap.md "
            f"§ MACE MLIP interface (M3)."
        )
    elif method in wavefunction_methods:
        atoms.calc = _make_wavefunction_ase_calculator(
            molecule,
            basis_name,
            method,
            functional=functional,
            selected_ci_options=selected_ci_options,
            dmrg_options=dmrg_options,
            v2rdm_options=v2rdm_options,
            transcorrelated_options=transcorrelated_options,
            active_space=active_space,
        )
    else:
        atoms.calc = VibeQC(
            basis=basis_name,
            charge=molecule.charge,
            multiplicity=molecule.multiplicity,
            functional=functional,
            dispersion=dispersion_params,
            rhf_options=rhf_options,
            uhf_options=uhf_options,
            rks_options=rks_options,
            uks_options=uks_options,
        )

    with Trajectory(str(trajectory_path), "w", atoms) as traj:
        # BFGSLineSearch refuses uphill steps by construction — essential
        # on flat / weakly-bound PESs (H-bonded clusters, dispersion-dominated
        # complexes) where plain BFGS's Hessian extrapolation routinely
        # pushes atoms apart. Works on covalent minima too, so we use it
        # uniformly. Default ASE line-search parameters are well-tuned;
        # don't clamp maxstep here or the line search can't converge.
        opt = BFGSLineSearch(atoms, logfile=None)
        opt.attach(traj)
        opt.run(fmax=fmax, steps=max_steps)

    # Re-build a vibe-qc Molecule at the optimized geometry.
    from ._vibeqc_core import Atom as _Atom

    new_positions_bohr = atoms.positions / Bohr
    optimized = Molecule(
        [_Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, new_positions_bohr)],
        molecule.charge,
        molecule.multiplicity,
    )
    return optimized


def _resolve_optimizer_backend(requested: str) -> str:
    """Resolve ``optimizer_backend`` to ``"ase"`` or ``"native"``."""
    if requested == "ase":
        try:
            import ase  # noqa: F401
        except ImportError:
            raise ImportError(
                "optimizer_backend='ase' requires ASE. Install with "
                "`pip install ase` or use optimizer_backend='native'."
            ) from None
        return "ase"
    if requested == "native":
        return "native"
    if requested != "auto":
        raise ValueError(
            f"optimizer_backend={requested!r} — expected 'auto', 'ase', or 'native'."
        )
    # "auto": prefer ASE if installed, otherwise native.
    try:
        import ase  # noqa: F401
    except ImportError:
        return "native"
    return "ase"


[docs] def run_job( molecule: Molecule, *, basis: Optional[str] = None, method: Method = "auto", functional: Optional[str] = None, output: str | os.PathLike = "output", optimize: bool = False, write_molden_file: bool = True, write_xyz_file: bool = True, write_population_file: bool = True, write_cube: Union[bool, str, int, list, tuple, None] = False, cube_spacing: float = 0.2, cube_padding: float = 4.0, citations: bool = True, dry_run: bool = False, fmax: float = 0.05, max_opt_steps: int = 200, optimizer_backend: str = "auto", memory_override: bool = False, num_threads: Optional[int] = None, dispersion: DispersionSpec = None, solvent: object = None, record_hostname: bool = True, rhf_options: Optional[RHFOptions] = None, uhf_options: Optional[UHFOptions] = None, rks_options: Optional[RKSOptions] = None, uks_options: Optional[UKSOptions] = None, selected_ci_options: Optional[SelectedCIOptions] = None, dmrg_options: Optional[DMRGOptions] = None, v2rdm_options: Optional[V2RDMOptions] = None, transcorrelated_options: Optional[TranscorrelatedOptions] = None, active_space: Optional[tuple[int, int]] = None, progress: Union[bool, ProgressLogger, None] = None, verbose: Optional[int] = None, use_logging: bool = False, perf_log: Optional[Union[str, os.PathLike, bool]] = None, structured_log: Union[bool, str, os.PathLike, None] = False, crash_dump: Union[bool, str, os.PathLike, None] = True, # QVF visualisation archive (v1). output_qvf: bool = False, hessian: bool = False, # DFT+U (Dudarev rotationally-invariant) — Increment 2c. # List of HubbardSite objects; empty / None ≡ no +U. dft_plus_u: Optional[List["HubbardSite"]] = None, ) -> object: """Run a vibe-qc SCF job and write the standard output files. Parameters ---------- molecule The :class:`Molecule` describing the system (bohr coordinates). basis libint-recognized basis-set name. method ``"rhf"``, ``"uhf"``, ``"rks"``, ``"uks"``, ``"auto"``, ``"ccsd"``, or ``"ccsd(t)"`` (picks restricted vs unrestricted from ``molecule.multiplicity`` and switches between HF/KS based on whether ``functional`` is set). functional XC functional for RKS / UKS (e.g. ``"PBE"``, ``"B3LYP"``). Ignored for HF. output Path stem for the generated files. ``{output}.out`` always; also ``{output}.molden`` unless disabled; and ``{output}.traj`` when ``optimize=True``. optimize Run a BFGS geometry optimization first (via ASE), then the final SCF on the optimized geometry. The trajectory is written for animation (openable with ASE-aware viewers). write_molden_file Emit ``{output}.molden`` at the converged geometry. Default True. fmax, max_opt_steps Optimizer tolerance (eV/Å) and iteration limit. Ignored unless ``optimize=True``. memory_override If ``False`` (default), the driver estimates peak memory and aborts with :class:`InsufficientMemoryError` when the estimate exceeds the machine's available RAM. Set to ``True`` to proceed anyway — at the risk of swap-thrashing or a system freeze. num_threads If set, pin the OpenMP thread count for the duration of the calculation. ``None`` (default) leaves the current setting in place — which is usually "all cores" unless the environment variable ``OMP_NUM_THREADS`` is set or :func:`vibeqc.set_num_threads` was called earlier. The actual thread count used is recorded in the output log for reproducibility. dispersion Post-SCF D3(BJ) dispersion correction. Accepts: * ``None`` (default) — no dispersion. * ``True`` or ``"d3bj"`` — use D3-BJ params for the current DFT functional. * A functional name (``"pbe"``, ``"b3lyp"``, …) — use its D3-BJ params (useful for ``method="rhf"`` + ``"hf"`` dispersion, or for overriding the SCF functional in the damping lookup). * A :class:`D3BJParams` instance — used directly. The energy correction is written to the ``.out`` file, added to the returned object as ``e_dispersion`` / ``energy_total`` (the raw SCF ``.energy`` is preserved untouched), and, when ``optimize=True``, added to the forces the optimizer sees. Routes through :func:`vibeqc.compute_d3bj` with ``backend="auto"`` — the reference ``dftd3`` backend is used when installed, otherwise the D1a framework stub. See :mod:`vibeqc.dispersion` for details. solvent v0.9.0 CPCM / COSMO implicit solvation. Accepts: * ``None`` (default) — gas-phase SCF. * A preset name (``"water"``, ``"dmso"``, ``"acetonitrile"``, ``"chloroform"``, ``"benzene"``, …) — looks up the static dielectric ε from :data:`vibeqc.SOLVENT_PRESETS`. * A numeric ε (e.g. ``78.39``) — custom dielectric. * A dict (``{"epsilon": 25.0, "variant": "cosmo", ...}``) or a :class:`vibeqc.SolventModel` instance for full control over cavity construction (Bondi radii, Lebedev order, switching width, max macro-iterations). Routes through :func:`vibeqc.run_cpcm_scf`; macro-iterates the apparent surface charge against the SCF density until ΔE_solv < 1e-6 Ha (typically 3–5 outer cycles). The total energy is the in-solvent value; the gas-phase reference is retained on ``result.solvent_result.e_gas``. See :doc:`/user_guide/solvation` for the full theory and the cavity / preset table. record_hostname If ``False``, the per-job ``{output}.system`` manifest writes ``hostname = "<redacted>"`` instead of the live hostname. The ``VIBEQC_NO_HOSTNAME=1`` environment variable does the same thing globally — engineering's bundled docs runs use the env var so example outputs in ``docs/_static/examples/`` don't leak machine names. Other manifest fields (CPU model, OS, memory, library versions) are not redacted; the redaction is scoped to the hostname only. rhf_options / uhf_options / rks_options / uks_options Optional override for the respective SCF options struct. progress Live progress logger for long-running jobs. Default behavior is **ON** — the job emits a banner, per-stage milestones, and a final summary to stdout (line-flushed) so the canonical ``nohup python LiH.py > LiH.log 2>&1 &`` + ``tail -f LiH.log`` workflow shows progress in real time. The ``.out`` file is also line-buffered so ``tail -f output-LiH.out`` works without any extra setup. Pass ``progress=False`` to silence stdout (the ``.out`` file is still written normally — only the live mirror is suppressed). Pass a :class:`vibeqc.ProgressLogger` instance for fully custom routing (tee to a persistent file, mute, thread one logger through nested calls). Set ``VIBEQC_LIVE_LOGGING=0`` in the environment to disable live progress globally — useful for batch scripts that don't want to edit every input file. The env var only takes effect when ``progress`` is left at its default (``None``); explicit ``progress=True`` / ``progress=False`` / a ``ProgressLogger`` instance always wins, so a debugging session can re-enable progress for one shell. Per-iteration progress for periodic SCFs (which run in Python) is streamed live through this same logger via the lower-level ``run_*_periodic_*`` entry points; molecular SCFs run in C++ and only emit a pre-SCF banner + post-SCF summary live, with the per-iteration trace landing in the ``.out`` when the SCF returns. verbose Integer verbosity level (PySCF convention, 0..9, default 4). Each level is a strict superset of the one below, so bumping ``verbose`` only adds output: * ``0`` — silent (nothing live; ``.out`` is still written) * ``1`` — banner + warnings + final SCF summary only * ``2`` — add per-stage milestones + ``info()`` lines * ``3`` — add per-stage timing on stage exit * ``4`` — add per-iteration SCF rows (DEFAULT) * ``5`` — add inline RSS-memory snapshots * ``6+`` — phase-level wall-clock breakdown live (overlaps the post-mortem ``.perf`` log on purpose) Pass ``verbose=None`` (the default) to read the ``VIBEQC_VERBOSE`` env var; if unset, falls back to 4. Ignored when ``progress`` is a :class:`ProgressLogger` instance — that logger's own level wins. use_logging If ``True``, route progress through ``logging.getLogger("vibeqc.run_job")`` instead of bare ``stdout`` writes. Banner / stage milestones land at ``INFO``; per-iteration SCF rows at ``DEBUG``; warnings at ``WARNING``. Composes naturally with stdlib handlers (``RotatingFileHandler``, syslog, ``dictConfig``):: import logging logging.basicConfig(level=logging.INFO) vq.run_job(..., use_logging=True) ``progress=False`` still wins as a hard kill switch — the verbose-level gate runs *before* the logging call, so a silent run stays silent regardless of the active logging config. Ignored when ``progress`` is a pre-built :class:`ProgressLogger` instance. perf_log Optional path (or ``True`` to use ``{output}.perf``) to write a post-mortem performance / debug breakdown — phase-level wall + CPU times, memory snapshots, parallelism flags. The live ``progress=`` log shows progress *during* the run; the perf log shows where the time went *afterwards*. Off by default. Pass an explicit path, ``True`` to emit alongside ``{output}.out``, or set the ``VIBEQC_PERFLOG=path`` env var (which wins when ``perf_log`` is left at ``None``). See :mod:`vibeqc.perf` for the full instrumentation surface. structured_log Optional NDJSON (one-JSON-record-per-line) log capturing every SCF transition — banner, job_start, memory_estimate, per-iter rows, scf_converged, properties, job_end. Off by default. Pass ``True`` to emit ``{output}.scf.jsonl``, a path-like to write there explicitly, or set the ``VIBEQC_STRUCTURED_LOG=path`` env var (which wins when ``structured_log`` is left at ``False``). The format is stable: events are append-only, fields are never renamed or removed. See :mod:`vibeqc.structured_log` for the full event catalog. crash_dump Write a snapshot to ``{output}.dump`` (TOML) plus binary attachments (``.dump.density.npy``, ``.dump.fock.npy``, ``.dump.mo.npy``) when the SCF fails ungracefully — raised exception (NaN, linear dependence, memory error). Default ``True``: post-mortem reproducibility costs nothing on success and saves a re-run on failure. Pass ``False`` (or set ``VIBEQC_NO_CRASH_DUMP=1`` in the environment) to disable. The dump is written alongside ``{output}.out`` in the standard ``output.*`` family — re-attach the ``.dump`` + ``.dump.density.npy`` to a bug report and the maintainer can reconstruct the failing state via :func:`vibeqc.load_dump`. See :mod:`vibeqc.crash_dump` for the dump format. The exception is always re-raised after the dump is written; ``crash_dump=True`` does *not* swallow failures. hessian When True, compute harmonic vibrational frequencies via finite-difference Hessian. Default False. Cost: ~6N SCF evals. Results printed to .out and embedded in QVF for vibe-view. Returns ------- The SCF result object (RHFResult / UHFResult / RKSResult / UKSResult). """ output_stem = Path(os.fspath(output)) out_path = output_stem.with_suffix(".out") molden_path = output_stem.with_suffix(".molden") traj_path = output_stem.with_suffix(".traj") # Resolve perf_log target. ``True`` → emit to {output}.perf; # str/Path → use that path verbatim; None → defer to # VIBEQC_PERFLOG env var inside perf_log() (or no-op if unset). if perf_log is True: _perf_target: Optional[Path] = output_stem.with_suffix(".perf") elif perf_log is False or perf_log is None: _perf_target = None else: _perf_target = Path(os.fspath(perf_log)) # Resolve structured_log target. Default OFF (False) — only emit # when the caller opts in. Resolution mirrors perf_log: # True → {output}.scf.jsonl # str/PathLike → use verbatim # False / None → defer to VIBEQC_STRUCTURED_LOG env var (or # leave disabled if the env var is unset) if structured_log is True: _structured_target: Optional[Path] = output_stem.with_suffix(".scf.jsonl") elif structured_log is False or structured_log is None: _structured_target = None else: _structured_target = Path(os.fspath(structured_log)) # Resolve crash_dump target. Default ON: dump on failure unless the # caller explicitly opts out (or VIBEQC_NO_CRASH_DUMP=1). The # post-mortem snapshot is cheap on success (zero bytes written), # and saves a re-run on failure. Resolution: # True → {output}.dump (default) # str/PathLike → use verbatim as the stem # False → disabled # None → also default-on (treated as True) _crash_off_env = os.environ.get( "VIBEQC_NO_CRASH_DUMP", "", ).strip().lower() in ("1", "true", "yes", "on") if crash_dump is False or _crash_off_env: _crash_stem: Optional[Path] = None elif crash_dump is True or crash_dump is None: _crash_stem = output_stem else: _crash_stem = Path(os.fspath(crash_dump)) # Default-ON progress: when the caller didn't pass a value, # check the env-var opt-out (VIBEQC_LIVE_LOGGING=0) and otherwise # turn it on. This lets every backgrounded ``python input.py > # log 2>&1 &`` show progress without the user having to learn # a new kwarg — answering the "is my job stuck or actually # running?" question by default. Explicit progress= (True/False/ # logger) bypasses the env var entirely. if progress is None: env = os.environ.get("VIBEQC_LIVE_LOGGING", "").strip().lower() if env in ("0", "false", "no", "off"): progress = False else: progress = True # Verbose level resolution (v0.5.3). When the caller leaves # ``verbose=None`` we read VIBEQC_VERBOSE; otherwise the # explicit argument wins. Falls back to the package default # (level 4 — banner + stages + per-iter SCF) when neither # is set. The env var is a parsing best-effort: a junk value # silently falls back to the default rather than raising, # since a typo in $VIBEQC_VERBOSE shouldn't kill someone's # overnight run. if verbose is None: env_verbose = os.environ.get("VIBEQC_VERBOSE", "").strip() if env_verbose: try: verbose_level = int(env_verbose) except ValueError: verbose_level = None # type: ignore[assignment] else: verbose_level = max(0, verbose_level) else: verbose_level = None # type: ignore[assignment] else: verbose_level = max(0, int(verbose)) plog = resolve_progress( progress, verbose=verbose_level, use_logging=use_logging, ) # Composite 3c keyword resolution (e.g. ``method="hf-3c"``). For # non-composite ``method`` values this is a pass-through. Composite # values get their (basis, functional, dispersion) tuple inferred # from the :mod:`vibeqc.composites` registry; raises # :class:`CompositeUnavailable` for recipes whose prerequisites # have not yet landed. method, basis, functional, dispersion, composite_recipe = _apply_composite( method, basis=basis, functional=functional, dispersion=dispersion, molecule=molecule, ) # Semiempirical methods don't use a Gaussian basis set — accept # any basis value (including None / empty). _se_methods = {"dftb0", "scc_dftb", "pm6", "gfn2_xtb", "om1", "om2", "om3"} if method in _se_methods or method in _MLIP_METHODS: if not basis: basis = "" # placeholder; semiempirical / MLIP code ignores it elif not basis: raise ValueError( "run_job: basis is required. Pass basis='def2-svp' (or " "another supported basis), or use a composite keyword " "(method='hf-3c', …) which carries its own basis. " "Composite catalogue: " f"{[r.name for r in __import__('vibeqc').list_composites()]}" ) resolved_method = _select_method(method, molecule, functional) # Composite recipes may request D4 dispersion; the D4 path is # wired through compute_d4 in the post-SCF block below. A D4 # request short-circuits the D3-BJ resolution. use_d4 = isinstance(dispersion, str) and dispersion.strip().lower() == "d4" # Build the declarative OutputPlan once. It drives BOTH the # dry-run pre-flight (below) and the real run's manifest # lifecycle (the OutputWriter constructed before the SCF). One # source of truth for "what files will this job produce". _cube_req_plan = parse_write_cube_kwarg(write_cube) _output_plan = OutputPlan.from_run_job_kwargs( output=output_stem, method=resolved_method, basis=basis, functional=functional, optimize=optimize, write_molden_file=write_molden_file, write_xyz=write_xyz_file, citations=citations, write_population=write_population_file, write_cube_density=_cube_req_plan.density, cube_mo_labels=tuple(str(lbl) for lbl in _cube_req_plan.mo_labels), perf_log=perf_log, structured_log=structured_log, crash_dump=crash_dump, output_qvf=output_qvf, ) # Dry-run short-circuit (Phase O3). When ``dry_run=True`` or the # ``VIBEQC_DRY_RUN`` env var is set, write a one-shot # ``{output}.system`` with ``[outputs].status = "dry_run"``, # print the declared-artefacts summary to stdout, and return # ``None`` without running the SCF. This is the pre-flight path # that ``vq submit`` uses to learn which files a job will # produce. Run before any heavyweight setup (basis-set # construction, memory estimate, perf tracker) so the # short-circuit is genuinely cheap. if dry_run or is_dry_run_requested(): # Make sure the parent directory exists so we don't fail on a # freshly-typed `-o /tmp/new-subdir/foo` path. output_stem.parent.mkdir(parents=True, exist_ok=True) dry_run_manifest( _output_plan, record_hostname=record_hostname, ) return None # Resolve dispersion up front so a bad spec fails before we touch # the filesystem. d3_params is None iff dispersion is disabled. # A D4 request (composite recipes r²SCAN-3c / ωB97X-3c) short- # circuits the D3-BJ path — D4 is applied in the post-SCF block. if use_d4: d3_params = None else: d3_params = _resolve_dispersion(dispersion, functional) # Pin the thread count if the caller asked for one; otherwise leave # the current state alone. We always query at the end so the output # reports what was actually used (which may differ from what the # user asked for — e.g. on a single-core build or OpenMP-disabled # environment). if num_threads is not None: set_num_threads(int(num_threads)) threads_in_use = get_num_threads() # Ensure the parent directory exists so users can give names like # "runs/water" without pre-creating "runs/". out_path.parent.mkdir(parents=True, exist_ok=True) # Construct the OutputWriter — the per-job coordinator that owns # ``{output}.system`` for the rest of the run. This writes the # manifest immediately with the ``[plan]`` section + ``[outputs] # .status = "running"`` *before* the SCF starts, so a ``vq`` # daemon polling the workspace sees the declared output set and a # liveness signal from the first moment. ``finish()`` / # ``crash()`` at the exit paths flip the status; the end-of-job # record-sweep fills ``[[outputs.files]]`` with the artefacts # that actually landed. Replaces the bare end-of-job # ``write_system_manifest`` call (pre-v1.0 dispatch-overhaul). _output_writer = OutputWriter( _output_plan, record_hostname=record_hostname, ) t_job_start = time.perf_counter() # ``buffering=1`` requests line-buffered I/O so a `tail -f` against # the .out file shows new lines as the SCF / property pipeline emits # them. We also call ``f.flush()`` explicitly after major blocks for # the same reason — line buffering only kicks in on newlines. # # The perf-log context manager wraps the whole body so PerfScope # entries from anywhere downstream — periodic SCF stages, gradient # calls, anything that calls ``with PerfScope("..."): ...`` — # accumulate into the same tracker. When ``_perf_target`` is None # (and ``$VIBEQC_PERFLOG`` isn't set), the context manager is a # cheap no-op: PerfScope.__enter__/__exit__ become a single # ContextVar lookup with no I/O. hessian_result = None with ( _perf_log_ctx(_perf_target), _structured_log_ctx(_structured_target) as _slog, crash_dump_context(_crash_stem) as _crash_target, open(out_path, "w", encoding="utf-8", buffering=1) as f, PerfScope("run_job.total"), ): # Structured log: banner + job_start records. Always safe to # emit — _slog.emit is a no-op when the log is disabled. _libs = library_versions() _fp = run_fingerprint( method=resolved_method, basis=basis, functional=functional, molecule=molecule, ) _slog.emit( "banner", vibeqc_version=VIBEQC_VERSION, libint=_libs.get("libint", "unknown"), libxc=_libs.get("libxc", "unknown"), spglib=_libs.get("spglib", "unknown"), run_fingerprint=_fp, ) _slog.emit( "job_start", method=resolved_method, basis=basis, functional=functional, optimize=bool(optimize), threads=int(threads_in_use), n_atoms=int(len(list(molecule.atoms))), charge=int(molecule.charge), multiplicity=int(molecule.multiplicity), n_electrons=int(molecule.n_electrons()), output_stem=str(output_stem), ) f.write(banner() + "\n\n") f.write(_job_header(resolved_method, basis, functional) + "\n") f.write(_geom_summary(molecule) + "\n") f.write(f" Threads: {threads_in_use} (OpenMP shared-memory parallelism)\n\n") f.flush() plog.banner( f"run_job {_job_header(resolved_method, basis, functional).strip()}" ) plog.info(f"Threads: {threads_in_use} (OpenMP shared-memory parallelism)") plog.info(f"Output file: {out_path}") t_opt = 0.0 _traj_frames = [] _traj_energies = [] if optimize: # Increment 3 — analytic +U gradient via the # variational F = F_HF + S V_AO S Fock contribution. # The ASE _WavefunctionCalculator path is plumbed below. # For dft_plus_u, pre-populate the relevant method-options # struct's dft_plus_u_sites / dft_plus_u_ao_groups fields # once at the initial geometry — the AO-group indices are # geometry-invariant (depend only on shell layout per atom # Z + basis name), so the same translation is valid at # every optimization step. if dft_plus_u and resolved_method in ( "rhf", "uhf", "rks", "uks", ): from .dft_plus_u import _apply_dft_plus_u_to_options # Materialise a defaulted options struct if the user # didn't pass one, so the C++ binding sees a real # object with the dft_plus_u_sites field populated. if resolved_method == "rhf": if rhf_options is None: rhf_options = RHFOptions() _opts_for_dft_plus_u = rhf_options elif resolved_method == "uhf": if uhf_options is None: uhf_options = UHFOptions() _opts_for_dft_plus_u = uhf_options elif resolved_method == "rks": if rks_options is None: rks_options = RKSOptions() _opts_for_dft_plus_u = rks_options else: # uks if uks_options is None: uks_options = UKSOptions() _opts_for_dft_plus_u = uks_options _basis_for_dft_plus_u = BasisSet(molecule, basis) _apply_dft_plus_u_to_options( _opts_for_dft_plus_u, _basis_for_dft_plus_u, dft_plus_u, ) _opt_backend = _resolve_optimizer_backend(optimizer_backend) if _opt_backend == "ase": f.write(f" Geometry optimization (ASE/BFGS) -> {traj_path.name}\n") f.write(f" fmax = {fmax} eV/A, max_steps = {max_opt_steps}\n\n") f.flush() with ( plog.stage( "geometry_optimization", detail=f"BFGS, fmax={fmax} eV/A, max_steps={max_opt_steps}", ), PerfScope("geometry_optimization"), ): t0 = time.perf_counter() molecule = _optimize_geometry( molecule, basis, functional=functional, trajectory_path=traj_path, fmax=fmax, max_steps=max_opt_steps, dispersion_params=d3_params, method=resolved_method, selected_ci_options=selected_ci_options, dmrg_options=dmrg_options, v2rdm_options=v2rdm_options, transcorrelated_options=transcorrelated_options, active_space=active_space, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, ) t_opt = time.perf_counter() - t0 f.write(" Optimized geometry\n") f.write(_geom_summary(molecule) + "\n\n") f.flush() # Read trajectory frames for QVF archive. _traj_frames = [] _traj_energies = [] try: from ase.io.trajectory import Trajectory from ase.units import Bohr, Hartree from ._vibeqc_core import Atom traj = Trajectory(str(traj_path)) for atoms in traj: pos_bohr = atoms.positions / Bohr frame_mol = Molecule( [ Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, pos_bohr) ], molecule.charge, molecule.multiplicity, ) _traj_frames.append(frame_mol) try: e_eV = atoms.get_potential_energy() if e_eV is not None and e_eV == e_eV: _traj_energies.append(float(e_eV) / Hartree) except Exception as _traj_e_exc: # ASE calculator may not expose energies # per frame; compatibility shim — never # crash on this. warn_output_failure( _traj_e_exc, traj_path, role="trajectory_frame_energy", category=OutputFailureKind.compatibility_fallback, ) except Exception as _traj_read_exc: # The traj file was written by ASE just above but # we can't read it back — log it (the QVF archive # will lose its trajectory section but the SCF # itself is fine). warn_output_failure( _traj_read_exc, traj_path, role="trajectory_read_for_qvf", category=OutputFailureKind.optional_artifact, ) _traj_frames = [] _traj_energies = [] else: # Native scipy L-BFGS-B backend. from .molecular_optimize import optimize_molecule as _native_opt # Convert fmax from eV/Å to Ha/bohr. 1 eV/Å ≈ 0.045 Ha/bohr. _conv_grad = fmax * 0.045 f.write( f" Geometry optimization (native L-BFGS-B)\n" f" gtol = {_conv_grad:.2e} Ha/bohr, max_steps = {max_opt_steps}\n\n" ) f.flush() with ( plog.stage( "geometry_optimization", detail=f"L-BFGS-B, gtol={_conv_grad:.1e} Ha/bohr", ), PerfScope("geometry_optimization"), ): t0 = time.perf_counter() _opt_result = _native_opt( molecule, basis, method=resolved_method, functional=functional, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, max_iter=max_opt_steps, conv_tol_grad=_conv_grad, dispersion_params=d3_params, solvent=solvent, record_trajectory=bool(output_qvf), progress=False, ) t_opt = time.perf_counter() - t0 molecule = _opt_result.system _traj_frames = _opt_result.trajectory_frames or None _traj_energies = _opt_result.trajectory_energies or None f.write(" Optimized geometry\n") f.write(_geom_summary(molecule) + "\n\n") f.flush() # Semiempirical methods don't use a Gaussian basis; skip # BasisSet construction and memory estimation. if resolved_method in _se_methods or resolved_method in _MLIP_METHODS: basis_obj = None estimate = None else: with PerfScope("basis_set_construction"): basis_obj = BasisSet(molecule, basis) # Memory pre-flight if resolved_method in ("rhf", "uhf", "rks", "uks"): est_opts = { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) else: est_opts = None estimate = estimate_memory( molecule, basis_obj, method=resolved_method, options=est_opts, ) if estimate is not None: f.write( " " + format_memory_report( estimate, override_requested=memory_override, ).replace("\n", "\n ") + "\n\n" ) f.flush() _slog.emit( "memory_estimate", total_gb=float(estimate.total_gb), raw_total_bytes=int(estimate.raw_total_bytes), headroom_factor=float(estimate.headroom_factor), by_category={k: int(v) for k, v in estimate.by_category.items()}, ) check_memory(estimate, allow_exceed=memory_override) plog.info( f"Starting molecular SCF ({resolved_method.upper()}" + (f" / {functional}" if functional else "") + "). Per-iteration progress runs in compiled C++ and is " "not streamed live; the trace will be written when the SCF " "returns." ) # Memory snapshot before SCF: lets the perf log show RSS # growth caused by the integral / Fock build, separate from # the basis-set / pre-flight overhead. from .perf import active_tracker as _at _tracker = _at() if _tracker is not None: _tracker.snapshot_memory("start_of_scf") t0 = time.perf_counter() # Crash-dump scope: if the C++ SCF raises (NaN, linear # dependence, memory error, …), we want a snapshot of the # last known state on disk before the exception bubbles up # and the user has nothing to debug from. The state we can # capture from outside the C++ call is limited (no live # density / Fock) — we record the geometry, options, and # phase. Any deeper introspection needs a future C++ hook. try: with PerfScope(f"scf.{resolved_method}"): result = _run_single_point( resolved_method, molecule, basis_obj, functional=functional, rhf_options=rhf_options, uhf_options=uhf_options, rks_options=rks_options, uks_options=uks_options, selected_ci_options=selected_ci_options, dmrg_options=dmrg_options, v2rdm_options=v2rdm_options, transcorrelated_options=transcorrelated_options, active_space=active_space, solvent=solvent, dft_plus_u=dft_plus_u, ) except BaseException as _scf_exc: t_scf = time.perf_counter() - t0 f.write(f"\n SCF FAILED: {type(_scf_exc).__name__}: {_scf_exc}\n") f.flush() _slog.emit( "scf_failed", exception_type=type(_scf_exc).__name__, exception=str(_scf_exc), wall_s=float(t_scf), ) if _crash_target is not None: _opts_for_dump = ( { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) if resolved_method in ("rhf", "uhf", "rks", "uks") else None ) dump_on_failure( _crash_target, _scf_exc, {"phase": f"scf.{resolved_method}", "n_iters_completed": 0}, options=_opts_for_dump, molecule=molecule, ) f.write( f" Crash dump written to " f"{_crash_target.with_suffix('.dump').name}\n" ) f.flush() # Flip the manifest status to "crashed" before the # exception propagates — a ``vq`` daemon polling # ``{output}.system`` then sees the job aborted rather # than mistaking a stale "running" for a live job. try: _output_writer.crash(wall_seconds=t_scf) except Exception as _crash_exc: # Manifest bookkeeping must never mask the real SCF # exception we're about to re-raise. warn_output_failure( _crash_exc, _output_writer.manifest_path, role="manifest_crash_status", category=OutputFailureKind.manifest_recording, ) # Re-raise: crash_dump=True does NOT swallow the failure; # it just makes the failure debuggable. raise t_scf = time.perf_counter() - t0 if _tracker is not None: _tracker.snapshot_memory("end_of_scf") # Also record the per-iteration trace so the perf log # carries the same info as the .out's SCF trace, in # tabular form. The C++ result.scf_trace is the canonical # source for these numbers. for step in getattr(result, "scf_trace", []) or []: _tracker.add_scf_iter( iter=int(step.iter), energy=float(step.energy), dE=float(step.delta_e) if step.iter > 1 else None, grad=float(step.grad_norm), diis=int(step.diis_subspace), ) # Structured log: per-iter rows sourced from the C++ trace, # which is the canonical record of what the SCF did. Periodic # SCFs (Python-driven) emit ``scf_iter`` live via # ProgressLogger.iteration() — see vibeqc.progress; the # molecular C++ path runs as a black box so we replay its # trace here. The matching ``scf_converged`` record is emitted # below via plog.converged() so periodic and molecular paths # share the funnel. for step in getattr(result, "scf_trace", []) or []: _slog.emit( "scf_iter", iter=int(step.iter), energy=float(step.energy), dE=(float(step.delta_e) if step.iter > 1 else None), grad_norm=float(step.grad_norm), diis_subspace=int(step.diis_subspace), ) # Non-convergence path: write a crash dump too, since "ran # to max_iter" is a real failure mode. We don't raise — the # SCF returned a result and the user can inspect it — but we # leave the .dump alongside the .out so the diagnostic recipe # is the same as the exception path. if not bool(getattr(result, "converged", False)) and _crash_target is not None: dump_state = { "phase": f"scf.{resolved_method} (max_iter)", "scf_trace": list(getattr(result, "scf_trace", []) or []), "n_iters_completed": int(getattr(result, "n_iter", 0)), } for k_state, k_attr in ( ("density", "density"), ("density_alpha", "density_alpha"), ("density_beta", "density_beta"), ("fock", "fock"), ("fock_alpha", "fock_alpha"), ("fock_beta", "fock_beta"), ("mo_coeffs", "mo_coeffs"), ("mo_coeffs_alpha", "mo_coeffs_alpha"), ("mo_coeffs_beta", "mo_coeffs_beta"), ): v = getattr(result, k_attr, None) if v is not None: dump_state[k_state] = v _opts_for_dump = ( { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) if resolved_method in ("rhf", "uhf", "rks", "uks") else None ) dump_on_failure( _crash_target, None, dump_state, phase=f"scf.{resolved_method} (max_iter)", options=_opts_for_dump, molecule=molecule, ) f.write( f"\n SCF did not converge — crash dump written to " f"{_crash_target.with_suffix('.dump').name}\n" ) f.flush() # Check if we got a SolverResult (non-mean-field method) if isinstance(result, SolverResult): f.write("\n " + "-" * 60 + "\n") f.write(" Non-mean-field solver result\n") f.write(" " + "-" * 60 + "\n") f.write(_format_solver_trace(result) + "\n") else: f.write( format_scf_trace( result, molecule=molecule, basis=basis_obj, include_banner=False ) + "\n" ) f.flush() plog.converged( n_iter=int(getattr(result, "n_iter", 0)), energy=float(getattr(result, "energy", 0.0)), converged=bool(getattr(result, "converged", False)), ) # Post-SCF Coupled-Cluster (CCSD / CCSD(T)). if method in ("ccsd", "ccsd(t)") and bool(getattr(result, "converged", False)): # Guard: CCSD numerics are still under validation (see # docs/handover_ccsd.md). Set VIBEQC_EXPERIMENTAL_CCSD=1 # to proceed despite known issues. if not os.environ.get("VIBEQC_EXPERIMENTAL_CCSD", "").strip() == "1": raise RuntimeError( "CCSD / CCSD(T) numerics are still under validation. " "See docs/handover_ccsd.md for current status. " "Set VIBEQC_EXPERIMENTAL_CCSD=1 to proceed anyway." ) from .cc import CCSDOptions from .cc import run_ccsd as _run_ccsd_py cc_opts = CCSDOptions() cc_opts.compute_triples = method == "ccsd(t)" # Default frozen-core for first-row atoms (freeze 1s) has_first_row = any(atom.Z >= 3 and atom.Z <= 10 for atom in molecule.atoms) if has_first_row: cc_opts.n_frozen_core = 1 cc_opts.resolve_aux_basis(basis) with ( plog.stage( "ccsd", detail="DF-CCSD" + ("(T)" if cc_opts.compute_triples else ""), ), PerfScope("ccsd"), ): cc_result = _run_ccsd_py(molecule, basis_obj, result, cc_opts) _slog.emit( "ccsd_converged", converged=bool(cc_result.converged), n_iter=int(cc_result.n_iter), e_ccsd_correlation=float(cc_result.e_ccsd_correlation), e_ccsd=float(cc_result.e_ccsd), e_t=float(cc_result.e_t) if cc_opts.compute_triples else None, e_total=float(cc_result.e_total), t1_norm=float(cc_result.t1_norm), t2_norm=float(cc_result.t2_norm), ) # Write CCSD iteration table triples_label = " CCSD(T)" if cc_opts.compute_triples else " CCSD" f.write("\n Coupled-Cluster" + triples_label + "\n " + "-" * 78 + "\n") if cc_opts.compute_triples: hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format( "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS" ) else: hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format( "Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS" ) f.write(hdr + "\n") f.write(" " + "-" * 78 + "\n") for step in cc_result.cc_trace: f.write( " {:<5d} {:>16.10f} {:>12.6e} {:>12.6e} {:>12.6e} {:>6d}\n".format( int(step.iter), float(step.energy), float(step.delta_e) if step.iter > 1 else 0.0, float(step.r1_norm), float(step.r2_norm), int(step.diis_subspace), ) ) f.write(" " + "-" * 78 + "\n") if cc_result.converged: f.write(f" CCSD converged in {cc_result.n_iter} iterations\n") else: f.write(" CCSD DID NOT CONVERGE\n") f.write( f" E(CCSD correlation) = {cc_result.e_ccsd_correlation:16.10f} Ha\n" f" E(CCSD) = {cc_result.e_ccsd:16.10f} Ha\n" ) if cc_opts.compute_triples: f.write( f" E(T) correction = {cc_result.e_t:16.10f} Ha\n" f" E(CCSD(T)) = {cc_result.e_ccsd_t:16.10f} Ha\n" ) f.write( f" T1 norm = {cc_result.t1_norm:16.10f}\n" f" T2 norm = {cc_result.t2_norm:16.10f}\n" ) f.write("\n") f.flush() # Store CC result on the SCF result for downstream use result._cc_result = cc_result # Post-SCF dispersion (D3-BJ) as an additive correction. # Captured in a local so the composite-total sum below is # independent of whether ``result`` got wrapped in # _DispersionAugmented — a SolverResult is never wrapped (it has # no .e_dispersion attribute), and reading the term back off the # result then silently dropped D3-BJ from the composite total # (audit F1.2). e_d3bj = 0.0 if d3_params is not None: disp = compute_d3bj(molecule, d3_params) e_d3bj = float(disp.energy) e_scf = float(getattr(result, "energy", 0.0)) e_total = e_scf + e_d3bj f.write( "\n Dispersion correction (D3-BJ)\n" " " + "-" * 52 + "\n" f" {'s6':>10s} {d3_params.s6:14.6f}\n" f" {'s8':>10s} {d3_params.s8:14.6f}\n" f" {'a1':>10s} {d3_params.a1:14.6f}\n" f" {'a2':>10s} {d3_params.a2:14.6f}\n" f" {'E_disp':>10s} {disp.energy:14.8f} Ha" f" ({disp.energy * 627.5094740631:+.4f} kcal/mol)\n" f" {'E_SCF':>10s} {e_scf:14.8f} Ha\n" f" {'E_total':>10s} {e_total:14.8f} Ha\n" ) # Wrap the SCF result so callers can access both components # without breaking existing accesses to mo_coeffs, fock, etc. if not isinstance(result, SolverResult): result = _DispersionAugmented(result, e_d3bj, d3_params) # ---- Composite 3c post-SCF corrections (D4 + gCP + SRB) ------- # Only runs when a composite recipe is active. Each piece is # additive and logged separately; the composite total is the # SCF energy + every correction. e_d4 = 0.0 e_gcp = 0.0 e_srb = 0.0 if use_d4: if not dftd4_available(): raise ImportError( "composite recipe requests D4 dispersion but the " "optional 'dftd4' package is not installed.\n " "Install via `pip install -e '.[dispersion]'` or " "`pip install dftd4`." ) # dftd4 ships per-composite damping keyed on the composite # name (e.g. 'r2scan-3c', 'wb97x-3c') — distinct from the # parent-functional damping. Prefer the composite name. d4_func = ( composite_recipe.name if composite_recipe is not None else (functional or "hf") ) d4_result = compute_d4( molecule, d4_func, charge=float(molecule.charge), ) e_d4 = float(d4_result.energy) f.write( "\n Dispersion correction (D4)\n" " " + "-" * 52 + "\n" f" {'functional':>10s} {d4_result.functional!s:>14s}\n" f" {'E_disp':>10s} {e_d4:14.8f} Ha" f" ({e_d4 * 627.5094740631:+.4f} kcal/mol)\n" ) if composite_recipe is not None and composite_recipe.gcp_basis: # Damped composites (pbeh-3c / hse-3c / r2scan-3c) carry a # GCPDamping; pass it through as (dmp_scal, dmp_exp). Undamped # recipes (hf-3c, b3lyp-3c) leave it None. b97-3c has # gcp_basis=None (SRB-only) so this block is skipped for it. _gcp_damping = ( (composite_recipe.gcp_damping.dmp_scal, composite_recipe.gcp_damping.dmp_exp) if composite_recipe.gcp_damping is not None else None ) try: gcp_result = compute_gcp( molecule, composite_recipe.gcp_basis, damping=_gcp_damping, ) e_gcp = float(gcp_result.energy) f.write( "\n Geometric counterpoise correction (gCP)\n" " " + "-" * 52 + "\n" f" {'basis':>10s} {gcp_result.basis_name!s:>14s}\n" f" {'E_gCP':>10s} {e_gcp:14.8f} Ha" f" ({e_gcp * 627.5094740631:+.4f} kcal/mol)\n" ) except GCPDataMissing as exc: # gCP data table not bundled for this basis — log and # continue with E_gCP = 0 so the composite total is # explicitly marked incomplete rather than silently # wrong. Same posture as the dispersion builtin-stub. f.write( "\n Geometric counterpoise correction (gCP)\n" " " + "-" * 52 + "\n" f" SKIPPED — {exc}\n" ) if composite_recipe is not None and composite_recipe.sr_mod is not None: srb = composite_recipe.sr_mod atomic_numbers = [int(a.Z) for a in molecule.atoms] positions = [list(a.xyz) for a in molecule.atoms] e_srb = float(srb.evaluate(atomic_numbers, positions)) f.write( f"\n {srb.name}\n" " " + "-" * 52 + "\n" f" {'E_SRB':>10s} {e_srb:14.8f} Ha" f" ({e_srb * 627.5094740631:+.4f} kcal/mol)\n" ) if composite_recipe is not None: composite_total = ( float(getattr(result, "energy", 0.0)) + e_d3bj + e_d4 + e_gcp + e_srb ) f.write( f"\n Composite total ({composite_recipe.name})\n" " " + "-" * 52 + "\n" f" {'E_SCF':>10s} " f"{float(getattr(result, 'energy', 0.0)):14.8f} Ha\n" f" {'E_disp':>10s} " f"{e_d3bj + e_d4:14.8f} Ha\n" f" {'E_gCP':>10s} {e_gcp:14.8f} Ha\n" f" {'E_SRB':>10s} {e_srb:14.8f} Ha\n" f" {'E_total':>10s} {composite_total:14.8f} Ha\n" ) has_mos = hasattr(result, "mo_energies") or hasattr(result, "mo_energies_alpha") if write_molden_file and has_mos: with ( plog.stage("write_molden", detail=str(molden_path.name)), PerfScope("write_molden"), ): write_molden( molden_path, molecule, basis_obj, result, title=str(output_stem.name), ) f.write(f"\n Molecular orbitals written to {molden_path.name}\n") f.flush() # Population dump (Phase O6). Always-on for molecular runs by # default — cheap to compute (the SCF density is in memory), # cheap to write, and downstream tooling shouldn't have to # screen-scrape the .out for charges + bond orders + dipole. # The matching block stays in .out for human reading; the # .txt + .json siblings are the machine-readable form. if write_population_file and has_mos: pop_txt = output_stem.parent / (output_stem.name + ".population.txt") try: with ( plog.stage("write_population", detail=str(pop_txt.name)), PerfScope("write_population"), ): write_population( output_stem, result, basis_obj, molecule, ) f.write( f" Population dump written to " f"{pop_txt.name} + " f"{output_stem.name}.population.json\n" ) f.flush() except Exception as _pop_exc: f.write( f" (warning: population dump failed: " f"{type(_pop_exc).__name__}: {_pop_exc})\n" ) f.flush() warn_writer_failure( _pop_exc, pop_txt, role="population_summary", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Volumetric cubes (Phase O6). Opt-in via write_cube=: True / # "density" / "homo" / "lumo" / list-of-those-or-int-indices. # Each cube is wrapped in its own try/except so a single # grid-evaluation failure on one MO doesn't block the others. _cube_req = parse_write_cube_kwarg(write_cube) if _cube_req and has_mos: if _cube_req.density: try: with ( plog.stage("write_cube_density"), PerfScope("write_cube_density"), ): cube_path = write_cube_density_for_run_job( output_stem, result, basis_obj, molecule, spacing=cube_spacing, padding=cube_padding, ) f.write(f" Density cube written to {cube_path.name}\n") f.flush() except Exception as _cube_exc: f.write( f" (warning: density cube write failed: " f"{type(_cube_exc).__name__}: {_cube_exc})\n" ) f.flush() warn_writer_failure( _cube_exc, output_stem.with_suffix(".density.cube"), role="density_cube", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) if _cube_req.mo_labels: try: _mo_targets = requested_mo_indices( list(_cube_req.mo_labels), result, molecule, ) except Exception as _exc: f.write( f" (warning: MO label resolution failed: " f"{type(_exc).__name__}: {_exc})\n" ) warn_writer_failure( _exc, output_stem.with_suffix(".cube"), role="mo_label_resolution", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) _mo_targets = [] for _idx, _name in _mo_targets: try: with ( plog.stage(f"write_cube_mo_{_name}"), PerfScope(f"write_cube_mo_{_name}"), ): _mo_path = write_cube_mo_for_run_job( output_stem, result, basis_obj, molecule, _idx, _name, spacing=cube_spacing, padding=cube_padding, ) f.write(f" MO cube written to {_mo_path.name}\n") f.flush() except Exception as _cube_exc: f.write( f" (warning: MO {_name} cube write failed: " f"{type(_cube_exc).__name__}: {_cube_exc})\n" ) f.flush() warn_writer_failure( _cube_exc, output_stem.with_suffix(f".{_name}.cube"), role=f"mo_cube_{_name}", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Citations (Phase O5b). Assemble the reference list for this # job (software + libint + libxc-if-DFT + basis-set + # functional + DIIS + dispersion-if-used + ECP-if-used) and # emit {stem}.bibtex + .references siblings. The matching # "## References" block is appended to the .out a few lines # below so the text log carries the same references. Failures # are non-fatal: a routing miss or a writer crash leaves a # warning in the .out and the SCF result is unaffected. cite_block_text: str | None = None _refs = None if citations: try: _cite_db = load_default_database() _dispersion_key = None if d3_params is not None: # The framework only ships D3(BJ) today; the # database routes "d3bj" → Grimme 2010 + 2011. _dispersion_key = "d3bj" # Detect direct SCF usage for citation routing. _uses_direct = _detect_direct_scf( resolved_method, rhf_options, uhf_options, rks_options, uks_options, basis, molecule, ) # SCF accelerator picked up from opts.scf_accelerator — # fires the matching DIIS / EDIIS / ADIIS / KDIIS # citation instead of always crediting plain DIIS. _scf_accel = _detect_scf_accelerator( resolved_method, rhf_options, uhf_options, rks_options, uks_options, ) # ECP detection — libecpint citation only fires when # at least one options struct carries an ecp_centers # list (manual ECPCenter recipe or auto_ecp_centers). _uses_ecp = _detect_uses_ecp( rhf_options, uhf_options, rks_options, uks_options, ) # CPCM solvation citation fires when run_job was # invoked with a non-None ``solvent``; vibe-qc on main # only ships CPCM-style models, so the variant key # defaults to "cpcm". _uses_cpcm = solvent is not None # For composite-3c jobs (hf-3c / pbeh-3c / …) route # citations by the *composite keyword*, not the # resolved mean-field method — the composite carries # its own defining paper (+ gCP / D3) in # routes.methods. ``composite_recipe`` is None for # non-composite jobs, in which case the resolved # method name is the right routing key. _cite_method = ( composite_recipe.name if composite_recipe is not None else resolved_method ) # MLIP engines (method="mace") evaluate no Gaussian # integrals and run no SCF — suppress the always-on # libint + DIIS routes so the references reflect what # actually ran. The MACE / e3nn / PyTorch citations come # in via routes.methods[<mlip>] instead. _is_mlip = resolved_method in _MLIP_METHODS _refs = _cite_db.assemble( method=_cite_method, basis=basis, functional=functional, dispersion=_dispersion_key, scf_accelerator=_scf_accel, uses_ase=optimize, # BFGS path uses ASE uses_ecp=_uses_ecp, uses_cpcm=_uses_cpcm, direct_scf=_uses_direct, dft_plus_u=bool(dft_plus_u), uses_integrals=not _is_mlip, uses_scf=not _is_mlip, ) write_bibtex(output_stem, _refs) write_references(output_stem, _refs) cite_block_text = format_references_block(_refs) _bib_name = output_stem.with_suffix(".bibtex").name _ref_name = output_stem.with_suffix(".references").name f.write(f" Citations written to {_bib_name} + {_ref_name}\n") f.flush() except Exception as _cite_exc: f.write( f" (warning: citation emission failed: " f"{type(_cite_exc).__name__}: {_cite_exc})\n" ) f.flush() warn_writer_failure( _cite_exc, output_stem.with_suffix(".bibtex"), role="citations", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Final geometry as a plain XYZ sibling (Phase O3). ``molecule`` # at this point is the geometry the SCF actually ran on — the # optimised one when ``optimize=True``, otherwise the input. # Energy in Hartree is appended to the comment line so ASE / # Open Babel can recover the SCF result from the .xyz alone. if write_xyz_file: xyz_path = output_stem.with_suffix(".xyz") try: energy_ha = float(getattr(result, "energy", float("nan"))) except (TypeError, ValueError): energy_ha = float("nan") try: with ( plog.stage("write_xyz", detail=str(xyz_path.name)), PerfScope("write_xyz"), ): write_xyz( output_stem, molecule, energy_ha=(None if energy_ha != energy_ha else energy_ha), ) f.write(f" Final geometry written to {xyz_path.name}\n") f.flush() except Exception as _xyz_exc: # Geometry writer is best-effort — never let a `.xyz` # failure tank a finished SCF. The user can always # re-run with ``write_xyz_file=False``. f.write( f" (warning: .xyz writer failed: " f"{type(_xyz_exc).__name__}: {_xyz_exc})\n" ) f.flush() warn_writer_failure( _xyz_exc, xyz_path, role="final_xyz_geometry", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Structured log: properties event. Best-effort — the helpers # raise on basis-set shapes vibe-qc's Python side can't # parse; a failure here must never tank the run, so we wrap # each section. The event only fires when at least one # property was successfully computed. if _slog.enabled and has_mos: _props_payload: dict[str, Any] = {} try: from .properties import ( dipole_moment as _dipole, ) from .properties import ( loewdin_charges as _low, ) from .properties import ( mulliken_charges as _mul, ) _props_payload["mulliken"] = [ float(x) for x in _mul(result, basis_obj, molecule) ] _props_payload["loewdin"] = [ float(x) for x in _low(result, basis_obj, molecule) ] _dip = _dipole(result, basis_obj, molecule) _props_payload["dipole"] = { "x": float(_dip.x), "y": float(_dip.y), "z": float(_dip.z), "total": float(_dip.total), "total_debye": float(_dip.total_debye), } except Exception as _props_exc: # Properties are best-effort; the trace already # carries a degraded properties block, the structured # log just skips the event. Surface a warning so the # user knows the structured-log `properties` event was # dropped. warn_output_failure( _props_exc, output_stem.with_suffix(".out"), role="structured_log_properties", category=OutputFailureKind.compatibility_fallback, ) if _props_payload: _slog.emit("properties", **_props_payload) if optimize: if _opt_backend == "ase" and traj_path.is_file(): f.write(f" Optimization trajectory written to {traj_path.name}\n") f.flush() t_total = time.perf_counter() - t_job_start n_iter = getattr(result, "n_iter", 0) iter_avg = (t_scf / n_iter) if n_iter > 0 else float("nan") f.write("\n Timings (wall clock, seconds)\n " + "-" * 52 + "\n") if optimize: f.write(f" {'Geometry optimization':<32s} {t_opt:12.3f}\n") f.write(f" {'SCF total':<32s} {t_scf:12.3f}\n") f.write( f" {'SCF avg. per iteration':<32s} {iter_avg:12.3f} ({n_iter} iters)\n" ) f.write(f" {'Job total':<32s} {t_total:12.3f}\n") f.write( f" Used {threads_in_use} OpenMP thread" f"{'s' if threads_in_use != 1 else ''}.\n" ) f.flush() plog.info(f"Job total {t_total:.2f}s — output written to {out_path}") _slog.emit( "job_end", total_wall_s=float(t_total), scf_wall_s=float(t_scf), opt_wall_s=float(t_opt), n_iter=int(getattr(result, "n_iter", 0)), converged=bool(getattr(result, "converged", False)), energy=float(getattr(result, "energy", 0.0)), out_path=str(out_path), ) # "## References" block (Phase O5b) — embed the assembled # citation list in the .out so the text log is self-contained. if cite_block_text: f.write("\n" + cite_block_text + "\n") f.flush() # --- Harmonic vibrational analysis (finite-difference Hessian) --- if hessian: _scf_converged = bool(getattr(result, "converged", False)) if not _scf_converged: f.write( "\n ## Vibrational Frequencies\n" " " + "-" * 52 + "\n" " SKIPPED -- SCF did not converge.\n" ) f.flush() else: with ( plog.stage("hessian_fd", detail="finite-difference Hessian"), PerfScope("hessian_fd"), ): try: from .hessian import ( HessianFDOptions, compute_hessian_fd, ir_intensities, ) _hess_scf_opts = { "rhf": rhf_options, "uhf": uhf_options, "rks": rks_options, "uks": uks_options, }.get(resolved_method) _hess_opts = HessianFDOptions( include_dipole_derivatives=True, ) hessian_result = compute_hessian_fd( molecule, basis, method=resolved_method.upper(), scf_options=_hess_scf_opts, hessian_options=_hess_opts, ) f.write( f"\n ## Vibrational Frequencies\n" f" " + "-" * 52 + "\n" f" Finite-difference Hessian" f" (step = {_hess_opts.step_bohr:.3f} bohr," f" {hessian_result.n_displacements} displacements)\n" f" Imaginary modes: {hessian_result.imaginary_count}\n" f" Linear molecule: {hessian_result.is_linear}\n\n" ) _n_skip = 5 if hessian_result.is_linear else 6 _freqs = hessian_result.frequencies_cm1 _ir = None try: _ir = ir_intensities(hessian_result) except Exception as _ir_exc: f.write( f" (warning: IR intensities not available: " f"{type(_ir_exc).__name__}: {_ir_exc})\n" ) _header = " {:<6s} {:>10s}" + ( " {:>12s}" if _ir is not None else "" ) if _ir is not None: f.write( _header.format( "Mode", "Freq/cm\u207b\u00b9", "IR/(km/mol)" ) + "\n" ) f.write(" " + "-" * 52 + "\n") for k in range(_n_skip, len(_freqs)): _label = f"{k - _n_skip + 1}" _freq = _freqs[k] _iri = _ir[k] if _freq < 0: _freq_str = f"{abs(_freq):.1f}i" else: _freq_str = f"{_freq:.1f}" f.write( f" {_label:<6s} {_freq_str:>10s} {_iri:>12.2f}\n" ) else: f.write( _header.format("Mode", "Freq/cm\u207b\u00b9") + "\n" ) f.write(" " + "-" * 52 + "\n") for k in range(_n_skip, len(_freqs)): _label = f"{k - _n_skip + 1}" _freq = _freqs[k] if _freq < 0: _freq_str = f"{abs(_freq):.1f}i" else: _freq_str = f"{_freq:.1f}" f.write(f" {_label:<6s} {_freq_str:>10s}\n") f.write("\n") f.flush() except Exception as _hess_exc: f.write( f"\n ## Vibrational Frequencies\n" f" " + "-" * 52 + "\n" f" FAILED: {type(_hess_exc).__name__}: {_hess_exc}\n" ) f.flush() hessian_result = None # --- QVF visualisation archive (v1) ---------------------------------- if output_qvf: try: from vibeqc.output.formats.qvf import ( scf_history_from_result as _scf_history_from_result, ) from vibeqc.output.formats.qvf import write_qvf as _write_qvf _qvf_path_for_warn = output_stem.with_suffix(".qvf") _qvf_scf_history = _scf_history_from_result(result) _bibtex = None if _refs is not None: try: from vibeqc.output.citations.bibtex import format_bibtex _bibtex = format_bibtex(_refs) except Exception as _bib_exc: warn_output_failure( _bib_exc, _qvf_path_for_warn, role="qvf_bibtex_prep", category=OutputFailureKind.compatibility_fallback, ) _pop = None if write_population_file and has_mos: try: from vibeqc.output.formats.population import ( compute_population_summary, ) _pop = compute_population_summary( result, basis_obj, molecule, ) except Exception as _qpop_exc: warn_output_failure( _qpop_exc, _qvf_path_for_warn, role="qvf_population_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_vol_data = None if has_mos and _cube_req.density: try: from vibeqc.output.formats.qvf import qvf_density_data _qvf_vol_data = qvf_density_data( result, basis_obj, molecule, spacing=cube_spacing, padding=cube_padding, ) except Exception as _qvol_exc: warn_output_failure( _qvol_exc, _qvf_path_for_warn, role="qvf_density_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_mo_list = None if has_mos and _cube_req.mo_labels: try: from vibeqc.output.formats.qvf import qvf_mo_data _indices = requested_mo_indices( _cube_req.mo_labels, result, molecule ) _qvf_mo_list = qvf_mo_data( result, basis_obj, molecule, [int(i) for i, _ in _indices], spacing=cube_spacing, padding=cube_padding, ) except Exception as _qmo_exc: warn_output_failure( _qmo_exc, _qvf_path_for_warn, role="qvf_mo_prep", category=OutputFailureKind.compatibility_fallback, ) # Package the wavefunction for the QVF archive so vibe-view # can resample any MO on demand (wavefunction.gto section). # Mirrors the periodic runner; gated on write_molden_file so # the .molden sidecar and the embedded basis+MO stay in sync. _qvf_wf = None if write_molden_file and has_mos: try: from vibeqc.output.formats.qvf import qvf_wf_data _qvf_wf = qvf_wf_data(result, basis_obj, molecule) except Exception as _qwf_exc: warn_output_failure( _qwf_exc, _qvf_path_for_warn, role="qvf_wavefunction_prep", category=OutputFailureKind.compatibility_fallback, ) _qvf_path = _write_qvf( output_stem, _output_plan, molecule=molecule, result=result, method=resolved_method, basis=basis, functional=functional, population_summary=_pop, bibtex_content=_bibtex, wall_seconds=t_total, trajectory_frames=_traj_frames if _traj_frames else None, trajectory_energies=_traj_energies if _traj_energies else None, volume_data=_qvf_vol_data, mo_data=_qvf_mo_list, wf_data=_qvf_wf, hessian_result=hessian_result, scf_history_data=_qvf_scf_history, ) try: _output_writer.record(_qvf_path, wall_time_s=t_total) except Exception as _qvf_rec_exc: warn_writer_failure( _qvf_rec_exc, _qvf_path, role="qvf_manifest_record", category=OutputFailureKind.manifest_recording, writer=_output_writer, ) except Exception as _qvf_exc: warn_writer_failure( _qvf_exc, output_stem.with_suffix(".qvf"), role="qvf_archive", category=OutputFailureKind.optional_artifact, writer=_output_writer, ) # Finalise the {output}.system manifest (pre-v1.0 dispatch- # overhaul — replaces the bare end-of-job write_system_manifest # call). The OutputWriter, constructed before the SCF, already # wrote the [vibeqc] / [host] / [cpu] / ... / [plan] sections + # [outputs].status = "running". Here we (1) record every declared # artefact that actually landed on disk into [[outputs.files]] # — with bytes + sha256 + wall-time — so vq's fetch / status can # see exactly what to copy back, and (2) flip the status to # "complete" with the final wall-time. A non-converged SCF still # counts as "complete": the job ran to completion; the .out + the # result object carry the non-convergence. for _pf in _output_plan.files: if _pf.path.is_file(): try: _output_writer.record(_pf.path) except Exception as _rec_exc: # record() stats + hashes the file; a brittle write # at wrap-up must never tank a finished job. We do # surface it though — a manifest with stale rows # confuses ``vq fetch``. warn_writer_failure( _rec_exc, _pf.path, role=f"manifest_record_{_pf.role}", category=OutputFailureKind.manifest_recording, writer=_output_writer, ) _output_writer.finish(wall_seconds=t_total) return result
__all__ = ["run_job"]