"""High-level "run a job" convenience — classic QC-program workflow.
A single call writes the output text file, the molden orbital file, and
(for geometry optimization) a trajectory animation — the kind of shape
users expect from Gaussian / ORCA / NWChem:
from vibeqc import Atom, Molecule
from vibeqc.runner import run_job
mol = Molecule.from_xyz("h2o.xyz")
run_job(mol, basis="6-31g*", method="rhf", output="h2o")
# -> h2o.out, h2o.molden
run_job(mol, basis="6-31g*", method="rks", functional="PBE",
optimize=True, output="h2o_opt")
# -> h2o_opt.out, h2o_opt.molden, h2o_opt.traj
All text output goes to ``{output}.out``. The molden file is written iff
the method produces molecular orbitals (RHF / UHF / RKS / UKS) and
``write_molden=True``. Geometry optimization uses ASE's BFGS and writes
one frame per step to ``{output}.traj``.
"""
from __future__ import annotations
import os
import time
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from ._vibeqc_core import (
BasisSet,
D3BJParams,
Molecule,
RHFOptions,
RKSOptions,
UHFOptions,
UKSOptions,
get_num_threads,
run_rhf,
run_rks,
run_uhf,
run_uks,
set_num_threads,
)
from .banner import VIBEQC_VERSION, banner, library_versions
from .composites import (
Availability,
CompositeRecipe,
CompositeUnavailable,
resolve_composite,
)
from .crash_dump import crash_dump_context, dump_on_failure
from .dispersion import compute_d3bj, d3bj_params_for
from .dispersion_d4 import compute_d4, dftd4_available
from .dispersion_d4_parameters import normalize_d4_key
from .gcp import GCPDataMissing, compute_gcp
from .io.molden import write_molden
from .memory import check_memory, estimate_memory, format_memory_report
from .output import (
OutputPlan,
OutputWriter,
dry_run_manifest,
is_dry_run_requested,
parse_write_cube_kwarg,
requested_mo_indices,
write_cube_density_for_run_job,
write_cube_mo_for_run_job,
write_population,
write_xyz,
)
from .output._errors import (
OutputFailureKind,
warn_output_failure,
warn_writer_failure,
)
from .output.citations import (
format_references_block,
load_default_database,
write_bibtex,
write_references,
)
from .perf import PerfScope
from .perf import perf_log as _perf_log_ctx
from .progress import ProgressLogger, resolve_progress
from .scf_log import format_scf_trace
from .solvers import (
CASCIOptions,
CASPT2Options,
CASSCFOptions,
DMRGOptions,
Hamiltonian,
SelectedCIOptions,
SolverResult,
TranscorrelatedOptions,
V2RDMOptions,
build_hamiltonian_mo,
build_transcorrelated_hamiltonian,
get_hf_orbital_provider,
solve_dmrg,
solve_selected_ci,
solve_v2rdm,
)
from .structured_log import (
StructuredLog,
run_fingerprint,
)
from .structured_log import (
structured_log as _structured_log_ctx,
)
DispersionSpec = Optional[object] # str functional name | D3BJParams | None
def _resolve_dispersion(
dispersion: DispersionSpec,
functional: Optional[str],
) -> Optional[D3BJParams]:
"""Normalize the ``dispersion=`` argument into a D3BJParams object,
or None if no dispersion correction is requested.
* ``None`` or empty string → no dispersion.
* ``True`` / ``"d3bj"`` → use D3-BJ params for the current functional.
* ``"pbe" / "b3lyp" / ...`` → use D3-BJ params for that functional
explicitly (useful for HF + D3-BJ).
* :class:`D3BJParams` instance → used as-is.
"""
if dispersion is None or dispersion is False:
return None
if isinstance(dispersion, D3BJParams):
return dispersion
if isinstance(dispersion, str):
key = dispersion.strip().lower()
if key in ("", "none", "false"):
return None
if key in ("d3bj", "true", "yes", "on"):
if not functional:
raise ValueError(
"dispersion='d3bj' requires a DFT functional so we can "
"look up damping parameters; pass functional='pbe', "
"etc., or give a functional name directly as the "
"dispersion argument."
)
lookup = functional
else:
lookup = dispersion
p = d3bj_params_for(lookup)
if p is None:
raise ValueError(
f"dispersion={dispersion!r}: no D3-BJ parameters found "
f"for functional {lookup!r} (neither builtin nor dftd3 "
f"backend knows this name). Install the dftd3 backend "
f"for the full Grimme parameter set: "
f"`pip install dftd3` into your vibe-qc venv, or "
f"`pip install -e '.[dispersion]'` from the repo checkout."
)
return p
if dispersion is True:
if not functional:
raise ValueError(
"dispersion=True requires a DFT functional; pass functional='pbe', etc."
)
return d3bj_params_for(functional)
raise TypeError(
f"dispersion must be None, bool, str, or D3BJParams; got "
f"{type(dispersion).__name__}"
)
class _DispersionAugmented:
"""Transparent wrapper around an SCF result that exposes the
dispersion contribution as extra attributes without overriding the
raw SCF ``.energy``. Users opt in to the total energy via
:attr:`energy_total`; everything else (mo_energies, density, fock,
converged, n_iter, scf_trace, …) forwards to the underlying
pybind11 SCF-result struct.
We wrap rather than mutate because the C++ result classes have
``def_readonly`` fields — they can't be extended in place from
Python.
"""
__slots__ = ("_scf", "e_dispersion", "dispersion_params")
def __init__(self, scf_result, e_dispersion: float, params: D3BJParams):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "e_dispersion", e_dispersion)
object.__setattr__(self, "dispersion_params", params)
def __getattr__(self, name):
return getattr(self._scf, name)
@property
def energy_total(self) -> float:
"""SCF + dispersion energy (Hartree)."""
return float(self._scf.energy) + float(self.e_dispersion)
def __repr__(self) -> str:
return (
f"{type(self._scf).__name__}(energy={self._scf.energy:.10f}, "
f"e_dispersion={self.e_dispersion:+.6e}, "
f"energy_total={self.energy_total:.10f})"
)
class _MP2Augmented:
"""Transparent wrapper exposing a post-SCF MP2 result without
overriding the raw SCF ``.energy``. :attr:`mp2` is the C++
``MP2Result`` (``e_hf`` / ``e_os`` / ``e_ss`` / ``e_correlation`` /
``e_total``); :attr:`energy_total` is the MP2 total energy. Everything
else (mo_energies, density, fock, converged, n_iter, scf_trace, …)
forwards to the underlying pybind11 SCF-result struct.
Mirrors :class:`_DispersionAugmented`: we wrap rather than mutate
because the C++ result classes have ``def_readonly`` fields and no
``__dict__`` (so ``result.mp2 = …`` would raise).
"""
__slots__ = ("_scf", "mp2")
def __init__(self, scf_result, mp2_result):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "mp2", mp2_result)
def __getattr__(self, name):
return getattr(self._scf, name)
@property
def energy_total(self) -> float:
"""SCF + MP2 correlation energy (Hartree)."""
return float(self.mp2.e_total)
def __repr__(self) -> str:
return (
f"{type(self._scf).__name__}+MP2("
f"e_scf={float(self._scf.energy):.10f}, "
f"e_corr={float(self.mp2.e_correlation):+.6e}, "
f"e_mp2_total={float(self.mp2.e_total):.10f})"
)
class _DLPNOMP2Augmented:
"""SCF result + DLPNO-MP2 result, attribute-forwarding wrapper.
Mirrors :class:`_MP2Augmented` — wraps rather than mutates because the
C++ SCF result has ``def_readonly`` fields and no ``__dict__``.
"""
__slots__ = ("_scf", "dlpno_mp2")
def __init__(self, scf_result, dlpno_result):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "dlpno_mp2", dlpno_result)
def __getattr__(self, name):
return getattr(self._scf, name)
@property
def energy_total(self) -> float:
"""SCF + DLPNO-MP2 correlation energy (Hartree)."""
return float(self.dlpno_mp2.e_total)
def __repr__(self) -> str:
return (
f"{type(self._scf).__name__}+DLPNO-MP2("
f"e_scf={float(self._scf.energy):.10f}, "
f"e_corr={float(self.dlpno_mp2.e_corr):+.6e}, "
f"e_total={float(self.dlpno_mp2.e_total):.10f})"
)
class _DLPNOCCSDAugmented:
"""SCF result + DLPNO-CCSD pilot result, attribute-forwarding wrapper."""
__slots__ = ("_scf", "dlpno_ccsd")
def __init__(self, scf_result, cc_result):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "dlpno_ccsd", cc_result)
def __getattr__(self, name):
return getattr(self._scf, name)
@property
def energy_total(self) -> float:
"""SCF + CCSD correlation (+ (T) if computed), Hartree."""
return float(self.dlpno_ccsd.e_total)
def __repr__(self) -> str:
return (
f"{type(self._scf).__name__}+DLPNO-CCSD("
f"e_scf={float(self._scf.energy):.10f}, "
f"e_corr={float(self.dlpno_ccsd.e_corr):+.6e}, "
f"e_t={float(self.dlpno_ccsd.e_t):+.3e}, "
f"e_total={float(self.dlpno_ccsd.e_total):.10f})"
)
class _CCSDAugmented:
"""SCF result + CCSD/CCSD(T) result, attribute-forwarding wrapper.
Mirrors :class:`_MP2Augmented`: wraps rather than mutates because the
C++ SCF result has ``def_readonly`` fields and no ``__dict__``.
:attr:`ccsd` is the C++ ``CCSDResult`` (``e_ccsd_correlation`` /
``e_ccsd`` / ``e_t`` / ``e_ccsd_t`` / ``cc_trace`` / ...).
"""
__slots__ = ("_scf", "ccsd")
def __init__(self, scf_result, cc_result):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "ccsd", cc_result)
def __getattr__(self, name):
return getattr(self._scf, name)
@property
def energy_total(self) -> float:
"""SCF + CCSD (+ (T) when computed) total energy (Hartree)."""
return float(self.ccsd.e_total)
def __repr__(self) -> str:
return (
f"{type(self._scf).__name__}+CCSD("
f"e_scf={float(self._scf.energy):.10f}, "
f"e_corr={float(self.ccsd.e_ccsd_correlation):+.6e}, "
f"e_t={float(self.ccsd.e_t):+.6e}, "
f"e_total={float(self.ccsd.e_total):.10f})"
)
class _OVGFAugmented:
"""Transparent wrapper exposing OVGF / GF2 quasiparticle results.
:attr:`ovgf` is the list of :class:`vibeqc.propagator.QuasiparticleResult`
(one per corrected orbital, carrying ε_scf / ε_qp / pole strength). All SCF
attributes forward to the underlying result (mirrors :class:`_MP2Augmented`;
the C++ result has no ``__dict__``).
"""
__slots__ = ("_scf", "ovgf")
def __init__(self, scf_result, ovgf_results):
object.__setattr__(self, "_scf", scf_result)
object.__setattr__(self, "ovgf", ovgf_results)
def __getattr__(self, name):
return getattr(self._scf, name)
def __repr__(self) -> str:
n = len(self.ovgf)
return f"{type(self._scf).__name__}+OVGF({n} quasiparticle orbitals)"
if TYPE_CHECKING:
from .mlip import MLIPOptions
from .semiempirical.methods.msindo_ccm import CCMOptions
from .thermo import ThermoOptions
Method = Literal[
"rhf",
"rks",
"uhf",
"uks",
"auto",
"selected_ci",
"dmrg",
"v2rdm",
"transcorrelated_ci",
"casci",
"mrci",
"casscf",
"fci",
"ccsd",
"ccsd(t)",
# Post-SCF Møller–Plesset (RHF reference + native C++ MP2; SCS/SOS via
# the c_os/c_ss spin-component scaling — see vibeqc.correlation).
"mp2",
"scs-mp2",
"sos-mp2",
# DLPNO-MP2 (RHF reference + vibeqc.dlpno.mp2 — Foster-Boys LMOs, PAO
# domains, semicanonical PNOs, coupled LMP2 residuals; Pinski 2015).
"dlpno-mp2",
# DLPNO-CCSD / CCSD(T) correctness pilot (vibeqc.dlpno.ccsd —
# subspace-projected through the FCI-anchored spin-orbital engine;
# O(N^6), capped at max_nbf; Riplinger 2013).
"dlpno-ccsd",
"dlpno-ccsd(t)",
# Green's-function quasiparticle IPs/EAs (RHF reference + the diagonal
# second-order self-energy — see vibeqc.propagator).
"ovgf",
# Composite 3c keywords (v0.9.0 — see vibeqc.composites). The
# dispatcher resolves these via _apply_composite before any of the
# other Method literal branches see them.
"hf-3c",
"pbeh-3c",
"b97-3c",
"b3lyp-3c",
"r2scan-3c",
"wb97x-3c",
"hse-3c",
# Semiempirical methods.
"dftb0",
"scc_dftb",
"pm6",
"gfn2_xtb",
"om1",
"om2",
"om3",
"msindo",
# Periodic MSINDO via the Cyclic Cluster Model (needs ccm_options with the
# supercell lattice vectors). Routed through vibeqc.semiempirical.methods.
# msindo_ccm.run_ccm.
"ccm",
# Machine-learning interatomic potential — external pre-trained
# model (ACEsuit MACE). Routed through vibeqc.mlip.mace.
"mace",
]
# Machine-learning interatomic potentials (external pre-trained models,
# routed through vibeqc.mlip). Basis-set-free like the semiempirical
# methods; unlike them, vibe-qc drives a third-party pre-trained forward
# pass (maintainer-approved CLAUDE.md §10 extension). Kept as a module
# constant so the basis-skip sites in run_job stay in sync.
_MLIP_METHODS = frozenset({"mace"})
# Post-SCF correlation methods: an RHF/UHF SCF runs first (resolved_method),
# but citations + the output label key off the *original* method so the
# correlation papers (routes.methods.{ccsd,ccsd(t),mp2,scs-mp2,sos-mp2}) fire.
_POSTSCF_CITE_METHODS = frozenset(
{"ccsd", "ccsd(t)", "mp2", "scs-mp2", "sos-mp2", "dlpno-mp2",
"dlpno-ccsd", "dlpno-ccsd(t)", "ovgf"}
)
def _select_method(
method: Method, molecule: Molecule, functional: Optional[str]
) -> str:
"""Resolve ``method='auto'`` against molecule.multiplicity + functional.
Heuristic wavefunction routing by electron count:
- ≤4 electrons → FCI (exact for tiny systems)
- ≤8 electrons → Selected-CI (systematically improvable)
- >8 electrons → HF (via SCF)
``ccsd`` and ``ccsd(t)`` run RHF SCF first, then post-SCF coupled-cluster."""
if method == "auto":
if functional:
return "uks" if molecule.multiplicity > 1 else "rks"
n_elec = molecule.n_electrons()
if n_elec <= 4:
return "fci"
if n_elec <= 8:
return "selected_ci"
return "uhf" if molecule.multiplicity > 1 else "rhf"
if method in ("ccsd", "ccsd(t)"):
return "rhf"
# Post-SCF MP2 (and its spin-component-scaled variants) runs the mean-field
# SCF first — RHF for closed shell, UHF for open shell (native run_ump2);
# the post-SCF step keys off the original ``method`` downstream.
if method in ("mp2", "scs-mp2", "sos-mp2"):
return "uhf" if molecule.multiplicity > 1 else "rhf"
# DLPNO-MP2 is closed-shell only for now (open-shell DLPNO is a later
# milestone); the dispatch branch raises a clear error on multiplicity.
if method in ("dlpno-mp2", "dlpno-ccsd", "dlpno-ccsd(t)"):
return "rhf"
# OVGF (diagonal-self-energy) runs the mean-field SCF first — RHF for closed
# shell, UHF for open shell (the spin-resolved spin-orbital self-energy).
if method == "ovgf":
return "uhf" if molecule.multiplicity > 1 else "rhf"
return method
def _apply_composite(
method: str,
*,
basis: Optional[str],
functional: Optional[str],
dispersion,
molecule: Molecule,
) -> tuple[str, str, Optional[str], object, Optional[CompositeRecipe]]:
"""Resolve a composite-3c ``method=`` keyword (``"hf-3c"``,
``"pbeh-3c"``, …) into the (method, basis, functional, dispersion,
recipe) tuple the rest of :func:`run_job` consumes.
Non-composite ``method`` values pass through unchanged with
``recipe = None``. For a composite:
* basis / functional / dispersion are taken from the recipe when
the caller left them at defaults; an explicit caller basis wins
with a warning (deliberate-experiment escape hatch).
* D3-BJ recipes carry their own re-fit damping (``d3bj_damping``);
it is passed straight through as a :class:`D3BJParams` so the
dispersion module doesn't fall back to a wrong per-functional
lookup.
* Recipes flagged ``PENDING_*`` raise :class:`CompositeUnavailable`
with a pointer at the gating roadmap item.
"""
recipe = resolve_composite(method)
if recipe is None:
return method, basis or "", functional, dispersion, None
if recipe.availability in (
Availability.PENDING_F1,
Availability.PENDING_F3,
Availability.PENDING_ECP,
):
raise CompositeUnavailable(
f"method={method!r}: {recipe.availability.value}. "
f"This composite depends on infrastructure not yet on "
f"main. See docs/user_guide/composites.md § Availability.\n"
f" Notes: {recipe.notes}"
)
resolved_basis = basis if basis else recipe.basis
if basis and basis.lower() != recipe.basis.lower():
import warnings
warnings.warn(
f"method={method!r} specifies basis={recipe.basis!r}, but "
f"caller passed basis={basis!r}. Honouring the explicit "
f"basis — the published 3c parameters were fit at the "
f"composite's native basis and may not transfer.",
stacklevel=2,
)
resolved_functional = functional if functional else recipe.functional
if dispersion is None:
if recipe.dispersion == "d3bj" and recipe.d3bj_damping is not None:
resolved_dispersion: object = D3BJParams(
s6=recipe.d3bj_damping.s6,
s8=recipe.d3bj_damping.s8,
a1=recipe.d3bj_damping.a1,
a2=recipe.d3bj_damping.a2,
)
else:
resolved_dispersion = recipe.dispersion
else:
resolved_dispersion = dispersion
# Pure-HF composites (recipe.functional is None — only HF-3c today)
# must route straight to the HF SCF driver. Handing "auto" to
# _select_method with no functional would drop into the
# wavefunction auto-ladder (≤4e → FCI, ≤8e → selected-CI), so a
# turnkey hf-3c on H2 silently ran fci(ndet=4) instead of HF
# (audit F1.1). DFT composites keep "auto" → _select_method picks
# rks/uks from multiplicity.
if resolved_functional is None:
resolved_method = "uhf" if molecule.multiplicity > 1 else "rhf"
else:
resolved_method = "auto"
return (
resolved_method,
resolved_basis,
resolved_functional,
resolved_dispersion,
recipe,
)
def _detect_scf_accelerator(
resolved_method: str,
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
) -> Optional[str]:
"""Return the lower-cased SCFAccelerator name (``"diis"`` / ``"ediis"``
/ ``"ediis_diis"`` / ``"kdiis"`` / ``"adiis"``) actually used by the
resolved SCF, or ``None`` when no options struct is in play (post-SCF
/ non-mean-field methods).
The citation router uses this to fire the right SCF-accelerator
references — Hu-Yang 2010 for ADIIS, Garza-Scuseria 2012 for KDIIS,
Kudin-Scuseria-Cancès 2002 for EDIIS — instead of always crediting
plain DIIS (Pulay 1980 / 1982).
"""
opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if opts is None:
return None
accel = getattr(opts, "scf_accelerator", None)
if accel is None:
return None
name = getattr(accel, "name", None) or str(accel)
return name.strip().lower()
def _detect_uses_ecp(
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
) -> bool:
"""Return True when any options struct carries a non-empty
``ecp_centers`` list — the runtime signal that libecpint will build
the V_ECP operator.
Conservative: returns False when no options struct is present at
all (post-SCF / FCI / selected-CI paths build their own HF prep
without going through these options).
"""
for opts in (rhf_options, uhf_options, rks_options, uks_options):
if opts is None:
continue
centers = getattr(opts, "ecp_centers", None)
if centers: # non-empty list
return True
return False
def _detect_direct_scf(
resolved_method: str,
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
basis_name: str,
molecule: Molecule,
) -> bool:
"""Return True when the SCF will use the direct (integral-driven)
Fock build path — either because ``scf_mode=DIRECT`` was explicitly
set, or because AUTO resolved to DIRECT (basis larger than the
auto threshold, and density_fit/cosx are off).
Conservative: returns False when we can't determine the resolved
mode (no options struct, or density_fit/cosx supersedes).
"""
opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if opts is None:
return False
# density_fit + cosx supersede the four-index path — direct SCF
# screening is not used, so the direct-SCF references don't apply.
if getattr(opts, "density_fit", False):
return False
scf_mode = getattr(opts, "scf_mode", None)
if scf_mode is None:
return False
from ._vibeqc_core import SCFMode
if scf_mode == SCFMode.DIRECT:
return True
if scf_mode == SCFMode.AUTO:
threshold = getattr(opts, "scf_mode_auto_threshold", 200)
n_bf = BasisSet(molecule, basis_name).nbasis
return n_bf > threshold
# CONVENTIONAL — not direct.
return False
def _detect_acceleration(
resolved_method: str,
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
) -> list[str]:
"""Return the integral/exchange-acceleration technique keys engaged by
the resolved SCF, for ``assemble(acceleration=...)``.
* ``density_fit=True`` (RI-J Coulomb fitting) ⇒ ``["rij"]`` — Whitten
1973 / Dunlap 1979 / Eichkorn 1995 + 1997.
* ``density_fit=True`` *and* ``cosx=True`` (RIJCOSX: RI-J Coulomb +
chain-of-spheres exchange) ⇒ ``["rijcosx"]`` — Neese 2009. COSX
requires density fitting (the C++ JK dispatch rejects ``cosx`` without
``density_fit``), so a lone ``cosx`` flag still maps to RIJCOSX.
Empty list for post-SCF / non-mean-field methods (no options struct in
the rhf/uhf/rks/uks map) and for the conventional / direct four-index
path (neither density_fit nor cosx set).
"""
opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if opts is None:
return []
density_fit = bool(getattr(opts, "density_fit", False))
cosx = bool(getattr(opts, "cosx", False))
if cosx: # COSX always pairs the RI-J Coulomb build → RIJCOSX
return ["rijcosx"]
if density_fit:
return ["rij"]
return []
def _detect_soscf_trah(
resolved_method: str,
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
) -> tuple[bool, bool]:
"""Return ``(uses_soscf, uses_trah)`` for the resolved SCF.
Both second-order convergers are armed by a *positive* threshold on the
options struct (``soscf_threshold`` / ``trah_threshold``): the driver
switches from Roothaan diagonalisation to the second-order step once the
orbital-gradient norm drops below it (cpp/src/soscf.hpp, trah.hpp). A
threshold of 0.0 (the default) means "never engage" — no citation. Both
False for methods with no options struct.
"""
opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if opts is None:
return (False, False)
def _armed(attr: str) -> bool:
try:
return float(getattr(opts, attr, 0.0) or 0.0) > 0.0
except (TypeError, ValueError):
return False
return (_armed("soscf_threshold"), _armed("trah_threshold"))
# SCF initial-guess enum names → citation-route keys. Only guesses with a
# defining-paper route appear; the traditional Hcore guess and AUTO (resolved
# at runtime) carry none, so they map to nothing.
_SCF_GUESS_ROUTE_KEYS = {
"sad": "sad",
"sap": "sap",
"hueckel": "huckel",
"huckel": "huckel",
}
def _detect_scf_guess(
resolved_method: str,
rhf_options: Optional[RHFOptions],
uhf_options: Optional[UHFOptions],
rks_options: Optional[RKSOptions],
uks_options: Optional[UKSOptions],
) -> Optional[str]:
"""Return the citation route key for the SCF initial guess explicitly
requested on the options struct (``"sad"`` / ``"sap"`` / ``"huckel"``),
or ``None`` for AUTO / HCORE / unrouted guesses and for methods with no
options struct.
Only an *explicit* non-AUTO selection cites a guess paper — the AUTO
resolution is a runtime decision not surfaced on the options struct, and
Hcore (Hückel-free bare-core guess) has no single defining publication.
"""
opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if opts is None:
return None
guess = getattr(opts, "initial_guess", None)
if guess is None:
return None
name = (getattr(guess, "name", None) or str(guess)).strip().lower()
return _SCF_GUESS_ROUTE_KEYS.get(name)
def _run_single_point(
method: str,
molecule: Molecule,
basis: BasisSet,
*,
functional: Optional[str],
rhf_options: Optional[RHFOptions] = None,
uhf_options: Optional[UHFOptions] = None,
rks_options: Optional[RKSOptions] = None,
uks_options: Optional[UKSOptions] = None,
selected_ci_options: Optional[SelectedCIOptions] = None,
dmrg_options: Optional[DMRGOptions] = None,
v2rdm_options: Optional[V2RDMOptions] = None,
transcorrelated_options: Optional[TranscorrelatedOptions] = None,
casci_options: Optional[CASCIOptions] = None,
caspt2_options: Optional[CASPT2Options] = None,
casscf_options: Optional[CASSCFOptions] = None,
active_space: Optional[tuple[int, int]] = None,
cas_reference: Optional[str] = None,
mlip_options: Optional[MLIPOptions] = None,
ccm_options: Optional["CCMOptions"] = None,
solvent: object = None,
dft_plus_u: Optional[List["HubbardSite"]] = None,
nddo: bool = False,
):
"""Single-point dispatcher. ``solvent`` (v0.9.0) reroutes through
:func:`vibeqc.solvation.run_cpcm_scf` and returns the underlying
SCF result with an ``e_solv`` / ``solvent_result`` attribute
attached for downstream output writers."""
method_opts = {
"rhf": rhf_options or RHFOptions(),
"uhf": uhf_options or UHFOptions(),
}
if method in ("rks", "uks"):
opts = (rks_options if method == "rks" else uks_options) or (
RKSOptions() if method == "rks" else UKSOptions()
)
opts.functional = functional or opts.functional or "lda"
method_opts[method] = opts
if solvent is not None and method in ("rhf", "uhf", "rks", "uks"):
if dft_plus_u:
raise NotImplementedError(
"DFT+U combined with implicit solvation (CPCM) is not yet "
"supported. The +U Fock term and the solvation macro-"
"iteration would need to be combined inside "
"vibeqc.solvation.run_cpcm_scf; defer until Increment "
"3+ or run +U without solvent."
)
# CPCM macro-iteration wrapper. The C++ result types
# (RHFResult / UHFResult / RKSResult / UKSResult) are
# pybind11 ``def_readonly`` — Python attribute set on them
# silently fails inside try/except. Wrap the inner result in
# an attribute-forwarding proxy so callers can still write
# ``result.energy`` / ``result.density`` / etc., and *also*
# read the new solvation diagnostics (``result.solvent_result``,
# ``result.e_solv``, ``result.energy_in_solvent``).
from .solvation import run_cpcm_scf
from .solvation.driver import _solvent_aware_scf_result
sol = run_cpcm_scf(
molecule,
basis,
method=method,
solvent=solvent,
options=method_opts[method],
)
return _solvent_aware_scf_result(sol)
if dft_plus_u and method in ("rhf", "uhf", "rks", "uks"):
from .dft_plus_u import _apply_dft_plus_u_to_options
_apply_dft_plus_u_to_options(method_opts[method], basis, dft_plus_u)
if method == "rhf":
return run_rhf(molecule, basis, method_opts["rhf"])
if method == "uhf":
return run_uhf(molecule, basis, method_opts["uhf"])
if method == "rks":
return run_rks(molecule, basis, method_opts["rks"])
if method == "uks":
return run_uks(molecule, basis, method_opts["uks"])
# ── Non-mean-field wavefunction methods ──
if method in (
"selected_ci",
"dmrg",
"v2rdm",
"transcorrelated_ci",
"casci",
"mrci",
"casscf",
"nevpt2",
"caspt2",
):
import numpy as np
# Starting orbitals for the determinant-solver family. The
# open-shell default is UHF natural orbitals (UNO-CAS — Pulay &
# Hamilton, J. Chem. Phys. 88, 4926 (1988)): one spin-restricted
# orbital set ordered by descending occupation, so the
# doubly-occupied core and the active window are well-defined.
# (Plain "uhf" keeps only the α orbitals; the β space — and hence
# the core — is then only approximately represented.) Override
# with cas_reference="rhf"|"uhf"|"uno".
if cas_reference is not None and cas_reference not in (
"rhf",
"uhf",
"uno",
):
raise ValueError(
f"cas_reference must be 'rhf', 'uhf' or 'uno', got "
f"{cas_reference!r}"
)
hf_method = cas_reference or (
"uno" if molecule.multiplicity > 1 else "rhf"
)
C = get_hf_orbital_provider(molecule, basis, method=hf_method)
H = build_hamiltonian_mo(molecule, basis, C)
# Optional active-space truncation. The CAS methods (casci / nevpt2 /
# caspt2) are handled separately below — they keep the full MO
# integrals and dress an explicit frozen core (standard CAS
# convention), rather than dropping the core + virtual orbitals the
# way these CI/DMRG/v2RDM backends do.
if active_space is not None and method not in (
"casci",
"mrci",
"casscf",
"nevpt2",
"caspt2",
):
n_active, n_elec = active_space
n_frozen = max(0, H.norb - n_active)
C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
H = build_hamiltonian_mo(molecule, basis, C_act)
H.nelec = n_elec
H.ms2 = 0
if method == "transcorrelated_ci":
if transcorrelated_options is None:
transcorrelated_options = TranscorrelatedOptions()
H = build_transcorrelated_hamiltonian(H, transcorrelated_options)
return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())
if method == "selected_ci":
return solve_selected_ci(H, selected_ci_options or SelectedCIOptions())
if method == "dmrg":
return solve_dmrg(H, dmrg_options or DMRGOptions())
if method == "v2rdm":
return solve_v2rdm(H, v2rdm_options or V2RDMOptions())
if method in ("casci", "mrci", "casscf", "nevpt2", "caspt2"):
from .solvers._casci import casci as _run_casci
# Standard CAS partition: full MO integrals with an explicit frozen
# core. active_space=(n_active_orb, n_active_elec) selects the
# lowest n_core orbitals as doubly-occupied core, n_active_orb
# active orbitals, the rest virtual (matching PySCF mcscf.CASCI).
# H here is the full, untruncated Hamiltonian.
n_elec_total = molecule.n_electrons()
if active_space is not None:
n_active_orb, n_active_elec = active_space
if (n_elec_total - n_active_elec) % 2 != 0:
raise ValueError(
"CAS active_space=(n_orb, n_elec) needs an even "
f"frozen-core electron count; got n_elec_total="
f"{n_elec_total}, n_active_elec={n_active_elec}."
)
n_core = (n_elec_total - n_active_elec) // 2
else:
n_active_orb, n_active_elec, n_core = H.norb, n_elec_total, 0
n_virt = H.norb - n_core - n_active_orb
if method == "casscf":
from .solvers._casscf import casscf as _run_casscf
c_opts = casscf_options or CASSCFOptions()
if (
getattr(c_opts, "pt2", None) is not None
and c_opts.ci_solver != "selected_ci"
):
raise ValueError(
"CASSCFOptions.pt2 is the Epstein-Nesbet PT2 "
"stage on a SELECTED wavefunction; it requires "
"ci_solver='selected_ci' (the exact-CI backend "
"has no external perturbers)"
)
sc = _run_casscf(
H.h1e,
H.h2e,
n_active_elec=n_active_elec,
n_active_orb=n_active_orb,
n_core=n_core,
nuclear_repulsion=H.nuclear_repulsion,
ms2=molecule.multiplicity - 1,
nroots=c_opts.nroots,
weights=c_opts.weights,
orbital_step=c_opts.orbital_step,
active_orbitals=c_opts.active_orbitals,
spin_pure=c_opts.spin_pure,
ci_solver=c_opts.ci_solver,
selected_ci_options=c_opts.selected_ci_options,
)
label = f"casscf({n_active_elec}e,{n_active_orb}o)"
if c_opts.ci_solver == "selected_ci":
label += "_selci"
if sc.e_totals:
label += f"_sa{len(sc.e_totals)}"
_sel_pt2 = None
if getattr(c_opts, "pt2", None) is not None:
# SHCI perturbative stage on the converged selected
# wavefunction, in the converged orbital basis (the
# variational headline energy is unchanged; the PT2
# estimate surfaces on its own).
from .solvers._selected_ci import selected_ci_pt2
p = c_opts.pt2
_sel_pt2 = selected_ci_pt2(
sc.h1e_cas,
sc.h2e_cas,
n_active_elec,
n_active_orb,
n_core,
result=sc.cas,
eps2=p.eps2,
n_samples=p.n_samples,
sample_size=p.sample_size,
eps2_loose=p.eps2_loose,
seed=p.seed,
)
return SolverResult(
energy=sc.e_total,
method=label,
converged=sc.converged,
n_iter=sc.n_iter,
energy_trace=sc.energy_trace or [sc.e_total],
root_energies=list(sc.e_totals) or None,
root_s2=_root_s2_values(
sc.cas, molecule.multiplicity - 1
),
ci_coeffs=sc.cas.ci_coeffs,
ci_labels=sc.cas.determinants,
rdm1=_active_rdm1(sc.cas),
selected_pt2=_sel_pt2,
)
# CASSCF-referenced MR-PT2: supplying casscf_options switches the
# nevpt2 / caspt2 reference from CASCI-on-HF-orbitals to a CASSCF
# whose converged integrals + lowest-root CI vector seed the PT2 —
# the standard CASSCF→CASPT2/NEVPT2 composition (mirrors the MRCI
# pattern below). Without casscf_options the historical
# CASCI-on-HF reference is kept (matches the recorded OpenMolcas
# `RASSCF CIonly` parity constants).
h1_ref, h2_ref = H.h1e, H.h2e
ref_suffix = ""
ref_root_energies: list[float] = []
if method in ("nevpt2", "caspt2") and casscf_options is not None:
from .solvers._casscf import casscf as _run_casscf
c_opts = casscf_options
if c_opts.active_orbitals is not None:
raise ValueError(
"CASSCF-referenced MR-PT2 does not support "
"active_orbitals window selection yet; use the "
"default lowest-core active window."
)
if c_opts.ci_solver != "casci":
raise ValueError(
"CASSCF-referenced MR-PT2 requires the exact CI "
"backend (the PT2 engines re-solve the full CAS "
"determinant space on the converged orbitals); "
"ci_solver='selected_ci' is method='casscf' only"
)
# spin_pure=None resolves inside casscf(): True for SA
# references (nroots > 1) since 2026-06-11, covering the
# multi-state CASPT2 composition (whose ⟨S²⟩-filtered
# model space needs same-spin SA orbitals), and False for
# single-state references.
sc = _run_casscf(
H.h1e,
H.h2e,
n_active_elec=n_active_elec,
n_active_orb=n_active_orb,
n_core=n_core,
nuclear_repulsion=H.nuclear_repulsion,
ms2=molecule.multiplicity - 1,
nroots=c_opts.nroots,
weights=c_opts.weights,
orbital_step=c_opts.orbital_step,
spin_pure=c_opts.spin_pure,
)
if not sc.converged:
raise RuntimeError(
f"CASSCF reference for method={method!r} did not "
f"converge (|g| = {sc.grad_norm:.2e} after "
f"{sc.n_iter} macro-iterations)"
)
cas = sc.cas
h1_ref, h2_ref = sc.h1e_cas, sc.h2e_cas
ref_suffix = "_casscf"
ref_root_energies = list(sc.e_totals)
else:
# casci_options is method="casci"-only; the CASCI built here
# as the *reference* for nevpt2 / caspt2 / mrci keeps the
# solver defaults (single root — the PT2 / MRCI engines seed
# from root 0; the MS-CASPT2 model space re-solves its own
# multi-root CASCI inside ms_caspt2()).
ci_opts = (
(casci_options or CASCIOptions())
if method == "casci"
else CASCIOptions()
)
cas = _run_casci(
H.h1e,
H.h2e,
n_active_elec=n_active_elec,
n_active_orb=n_active_orb,
n_core=n_core,
nuclear_repulsion=H.nuclear_repulsion,
ms2=molecule.multiplicity - 1,
nroots=ci_opts.nroots,
max_det=ci_opts.max_det,
)
if method == "casci":
energy, label = cas.e_total, f"casci({n_active_elec}e,{n_active_orb}o)"
if cas.nroots > 1:
# Multi-root CASCI: per-root energies + ⟨S²⟩ reach
# SolverResult.root_energies / .root_s2 via the shared
# return below (headline ``energy`` stays root 0).
ref_root_energies = list(cas.e_totals)
elif method == "mrci":
from .solvers._mrci import mrci as _run_mrci
# If casscf_options are provided, run CASSCF first for
# an orbital-optimized reference.
if casscf_options is not None:
from .solvers._casscf import casscf as _run_casscf
c_opts = casscf_options or CASSCFOptions()
if c_opts.ci_solver != "casci":
raise ValueError(
"CASSCF-referenced MRCI requires the exact CI "
"backend; ci_solver='selected_ci' is "
"method='casscf' only"
)
sc = _run_casscf(
H.h1e,
H.h2e,
n_active_elec=n_active_elec,
n_active_orb=n_active_orb,
n_core=n_core,
nuclear_repulsion=H.nuclear_repulsion,
ms2=molecule.multiplicity - 1,
nroots=c_opts.nroots,
weights=c_opts.weights,
orbital_step=c_opts.orbital_step,
active_orbitals=c_opts.active_orbitals,
spin_pure=c_opts.spin_pure,
)
h1, h2 = sc.h1e_cas, sc.h2e_cas
else:
h1, h2 = H.h1e, H.h2e
mc = _run_mrci(
h1,
h2,
n_active_elec=n_active_elec,
n_active_orb=n_active_orb,
n_core=n_core,
nuclear_repulsion=H.nuclear_repulsion,
ms2=molecule.multiplicity - 1,
)
label = f"mrci({n_active_elec}e,{n_active_orb}o)"
if casscf_options is not None:
label += "_casscf"
energy = mc.e_total
elif method == "nevpt2":
from .solvers._mrpt import nevpt2 as _run_nevpt2
pt = _run_nevpt2(cas, h1_ref, h2_ref, n_core=n_core, n_virt=n_virt)
energy = pt.e_total
label = f"nevpt2({n_active_elec}e,{n_active_orb}o){ref_suffix}"
else: # caspt2 — internally-contracted (default), un-gated
from .solvers._mrpt import caspt2 as _run_caspt2
cp = caspt2_options or CASPT2Options()
if cp.multistate is not None:
# MS/XMS-CASPT2 (Finley 1998 / Granovsky 2011 +
# Shiozaki 2011): multi-state effective Hamiltonian over
# the lowest nroots spin-pure CASCI roots of the
# reference basis (HF orbitals, or the converged
# SA-CASSCF basis when casscf_options are supplied).
from .solvers._ms_caspt2 import ms_caspt2 as _run_ms
if cp.multistate not in ("ms", "xms"):
raise ValueError(
"caspt2_options.multistate must be None, 'ms' "
f"or 'xms', got {cp.multistate!r}"
)
if cp.variant != "ic":
raise ValueError(
"multi-state CASPT2 requires variant='ic' "
f"(got {cp.variant!r})"
)
if cp.ipea != 0.0:
raise ValueError(
"multi-state CASPT2 supports ipea=0 only (the "
"IPEA shift is gated and explicit-engine-only)"
)
ms_nroots = cp.nroots or (
casscf_options.nroots if casscf_options else 0
)
if ms_nroots < 2:
raise ValueError(
"multi-state CASPT2 needs >= 2 model states: "
"set caspt2_options.nroots (CASCI reference) "
"or casscf_options.nroots (SA-CASSCF reference)"
)
msr = _run_ms(
h1_ref,
h2_ref,
n_core,
n_virt,
nroots=ms_nroots,
nuclear_repulsion=H.nuclear_repulsion,
mode=cp.multistate,
n_frozen=cp.n_frozen,
imaginary=cp.imaginary,
ms2=molecule.multiplicity - 1,
n_act_elec=n_active_elec,
)
label = (
f"caspt2({n_active_elec}e,{n_active_orb}o)"
f"_{cp.multistate}{ms_nroots}{ref_suffix}"
)
return SolverResult(
energy=msr.e_total,
method=label,
converged=True,
n_iter=1,
energy_trace=[msr.e_total],
root_energies=list(msr.energies),
ci_coeffs=cas.ci_coeffs,
ci_labels=cas.determinants,
rdm1=_active_rdm1(cas),
multistate=dict(
mode=msr.mode,
nroots=msr.nroots,
heff=msr.heff,
heff_asym=msr.heff_asym,
mixing=msr.mixing,
ss_energies=list(msr.ss_energies),
ref_energies=list(msr.ref_energies),
e2_corr=list(msr.e2_corr),
xms_rotation=msr.xms_rotation,
s2_values=list(msr.s2_values),
),
)
pt = _run_caspt2(
cas,
h1_ref,
h2_ref,
n_core=n_core,
n_virt=n_virt,
variant=cp.variant,
ipea=cp.ipea,
imaginary=cp.imaginary,
n_frozen=cp.n_frozen,
)
energy = pt.e_total
label = f"caspt2({n_active_elec}e,{n_active_orb}o){ref_suffix}"
return SolverResult(
energy=energy,
method=label,
# Direct CI + non-iterative PT2: a returned result means the
# eigensolve succeeded.
converged=True,
n_iter=1,
energy_trace=[energy],
root_energies=ref_root_energies or None,
root_s2=(
_root_s2_values(cas, molecule.multiplicity - 1)
if ref_root_energies
else None
),
# Reference-wavefunction diagnostics (the CASCI / CASSCF
# reference for PT2 methods): leading configurations +
# active natural occupations in the .out solver block.
ci_coeffs=cas.ci_coeffs,
ci_labels=cas.determinants,
rdm1=_active_rdm1(cas),
)
if method == "fci":
import numpy as np
from scipy.linalg import eigh
from .solvers import (
build_hamiltonian_matrix_unrestricted,
diagonal_matrix_element_unrestricted,
generate_determinants,
)
hf_method = "uhf" if molecule.multiplicity > 1 else "rhf"
C = get_hf_orbital_provider(molecule, basis, method=hf_method)
ham = build_hamiltonian_mo(molecule, basis, C)
if active_space is not None:
n_active, n_elec = active_space
n_frozen = max(0, ham.norb - n_active)
C_act = np.asarray(C)[:, n_frozen : n_frozen + n_active]
ham = build_hamiltonian_mo(molecule, basis, C_act)
ham.nelec = n_elec
norb = ham.norb
nelec = ham.nelec
nalpha = (nelec + ham.ms2) // 2
nbeta = (nelec - ham.ms2) // 2
all_dets = generate_determinants(norb, nalpha, nbeta)
H = build_hamiltonian_matrix_unrestricted(all_dets, ham.h1e, ham.h2e)
evals, evecs = eigh(H)
e_fci = evals[0] + ham.nuclear_repulsion
return SolverResult(
energy=e_fci,
method=f"fci(ndet={len(all_dets)})",
converged=True,
n_iter=1,
energy_trace=[e_fci],
ci_coeffs=evecs[:, 0],
ci_labels=all_dets,
)
# ── Semiempirical methods ──
se_methods = {
"dftb0",
"scc_dftb",
"pm6",
"gfn2_xtb",
"om1",
"om2",
"om3",
"msindo",
}
if method in se_methods:
if solvent is not None and method == "msindo":
# MSINDO implicit solvation (COSMO) — its own multipole solute↔cavity
# coupling + GEPOL cavity, not run_job's HF/DFT ESP-on-grid path
# above. Resolve the solvent to a dielectric and run the dedicated
# driver; the in-solvent total becomes ``result.energy``.
from vibeqc.semiempirical.methods.msindo import ANGSTROM_TO_BOHR as _A2B
from vibeqc.semiempirical.methods.msindo_cosmo import msindo_cosmo
from vibeqc.solvation.driver import resolve_solvent
sm = resolve_solvent(solvent)
if sm is None or sm.is_gas_phase:
return _run_semiempirical(method, molecule, nddo=nddo)
Z = [at.Z for at in molecule.atoms]
coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]
rc = msindo_cosmo(
Z,
coords_ang,
epsilon=sm.epsilon,
charge=int(molecule.charge),
multiplicity=int(molecule.multiplicity),
)
return _SEMPR(
float(rc.total_energy),
None,
method,
converged=bool(rc.converged),
n_iter=int(rc.n_iter),
binding_energy=float(rc.binding_energy),
e_solv=float(rc.e_solv),
e_gas=float(rc.e_gas),
)
return _run_semiempirical(method, molecule, nddo=nddo)
# ── Periodic MSINDO (Cyclic Cluster Model) ──
if method == "ccm":
return _run_ccm(molecule, ccm_options)
# ── Machine-learning interatomic potentials (MACE, …) ──
if method in _MLIP_METHODS:
return _run_mlip(method, molecule, mlip_options)
raise ValueError(
f"Unknown method {method!r}; use 'rhf', 'uhf', 'rks', 'uks', "
f"'selected_ci', 'dmrg', 'v2rdm', 'transcorrelated_ci', 'casci', "
f"'mrci', 'casscf', 'nevpt2', 'caspt2', 'fci', "
f"'ccsd', 'ccsd(t)', 'dftb0', 'scc_dftb', 'pm6', 'gfn2_xtb', "
f"'om1', 'om2', 'om3', 'mace', or 'auto'"
)
def _run_semiempirical(method: str, molecule: Molecule, *, nddo: bool = False):
"""Run a semiempirical calculation and return a result-like object.
``nddo`` (method="msindo" only) selects MSINDO's NDDO mode — the reference
program's default; ignored by the other semiempirical methods."""
import numpy as np
# ── DFTB methods ──
if method in ("dftb0", "scc_dftb"):
from vibeqc.semiempirical import DFTB0Model, SCCDFTBModel
from vibeqc.semiempirical.parameters import default_parameters
params = default_parameters()
if method == "dftb0":
model = DFTB0Model(molecule, params=params)
else:
model = SCCDFTBModel(molecule, params=params)
g = model.gradient() if hasattr(model, "gradient") else None
return _SEMPR(model.energy(), g, method, converged=True, n_iter=1)
# ── GFN2-xTB (experimental — model.energy() emits GFN2ExperimentalWarning) ──
if method == "gfn2_xtb":
from vibeqc.semiempirical.methods.gfn2 import GFN2Model
from vibeqc.semiempirical.methods.gfn2_params import load_gfn2_params
params = load_gfn2_params()
model = GFN2Model(molecule, params)
energy = model.energy()
return _SEMPR(
energy,
None,
method,
converged=model.converged,
n_iter=model.n_iter,
)
# ── NDDO methods (PM6, OM1, OM2, OM3) ──
# OM1 only: the published OM1 evaluates its core-valence ECP analytically
# (Kolb & Thiel 1993) and publishes no semiempirical ECP parameters
# (Dral 2016 Table 1). vibe-qc does not implement that analytic ECP yet,
# so OM1 lacks part of its core-valence Pauli repulsion: X-H bonds come
# out ~0.3 A short and the uncapped F2 orthogonalization term can
# variationally collapse at close non-bonded contacts (eclipsed ethane
# drops ~0.4 Ha; with an ECP restored the barrier is +1.9 kcal/mol vs
# published 1.8). PM6/OM2/OM3 carry their full published Hamiltonians
# and are not gated.
if method == "om1":
import warnings as _warnings
from vibeqc.semiempirical import NDDOExperimentalWarning
_warnings.warn(
"method='om1': OM1's analytically-evaluated core-valence ECP "
"(Kolb & Thiel 1993) is not implemented; bonds to heavy atoms "
"come out ~0.3 A short and close non-bonded contacts can "
"variationally collapse. Use om2/om3 for production. "
"See HANDOVER_SEMIEMPIRICAL_PES.md.",
category=NDDOExperimentalWarning,
stacklevel=2,
)
from vibeqc._vibeqc_core.semiempirical.nddo import (
run_omx_v2,
run_pm6,
)
if method == "pm6":
from vibeqc.semiempirical.methods.pm6_params import load_pm6_params_auto
params = load_pm6_params_auto([at.Z for at in molecule.atoms])
if molecule.multiplicity > 1:
from vibeqc._vibeqc_core.semiempirical.nddo import (
compute_upm6_gradient_fd,
run_upm6,
)
result_obj = run_upm6(molecule, params, max_iter=200)
try:
g = compute_upm6_gradient_fd(molecule, params)
except Exception:
g = None
result = _SEMPR(
float(result_obj.energy),
g,
method,
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
)
else:
result_obj = run_pm6(molecule, params, max_iter=200)
from vibeqc._vibeqc_core.semiempirical.nddo import compute_pm6_gradient_fd
try:
g = compute_pm6_gradient_fd(molecule, params)
except Exception:
g = None
result = _SEMPR(
float(result_obj.energy),
g,
method,
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
)
return result
if method == "msindo":
# MSINDO (Bredow/Geudtner/Jug INDO) — closed-shell RHF (s/p/d) +
# open-shell UHF (s/p).
from pathlib import Path
from vibeqc.semiempirical.methods.msindo import (
ANGSTROM_TO_BOHR as _A2B,
)
from vibeqc.semiempirical.methods.msindo import (
eff_core_charge as _cz,
)
Z = [at.Z for at in molecule.atoms]
# Atom coords are stored in bohr; both engines take Angstrom and
# re-convert with the same MSINDO constant, so the round-trip is exact.
coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]
charge = int(molecule.charge)
mult = int(molecule.multiplicity)
nelec = sum(_cz(z) for z in Z) - charge
open_shell = mult != 1 or nelec % 2 != 0
has_d = any(z >= 13 and z not in (1, 2, 6, 7, 8, 9) for z in Z)
# Determine whether the C++ backend can handle this job.
# C++ covers: neutral or not, closed-shell s/p/d (full Z=1-54) and
# UHF s/p (H–F). Fallback to Python for: charged systems with d-shell,
# open-shell d-shell, and anything not yet landed.
use_cpp = False
if not open_shell:
# Closed-shell: C++ handles full s/p/d with params loaded from JSON.
use_cpp = True
elif not has_d:
# Open-shell s/p: C++ UHF handles H–F elements.
use_cpp = True
if use_cpp:
from vibeqc._vibeqc_core.semiempirical.indo import (
MsindoParameterSet,
load_params_from_json,
merge_nddo_overrides,
run_msindo_full,
run_msindo_uhf,
)
# Load the full parameter set once (lazy init via module-level cache).
_param_path = (
Path(__file__).resolve().parent
/ "semiempirical"
/ "methods"
/ "msindo_params.json"
)
params = load_params_from_json(_param_path.read_text())
if nddo:
_nddo_path = (
Path(__file__).resolve().parent
/ "semiempirical"
/ "methods"
/ "msindo_params_nddo.json"
)
merge_nddo_overrides(params, _nddo_path.read_text())
if open_shell:
result_obj = run_msindo_uhf(
Z,
coords_ang,
params,
max_iter=200,
multiplicity=mult,
nddo=nddo,
)
else:
result_obj = run_msindo_full(
Z,
coords_ang,
params,
max_iter=200,
nddo=nddo,
)
else:
from vibeqc.semiempirical.methods.msindo import run_msindo as _run_py
result_obj = _run_py(
Z,
coords_ang,
charge=charge,
multiplicity=mult,
max_iter=200,
nddo=nddo,
)
return _SEMPR(
float(result_obj.total_energy),
None,
method,
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
binding_energy=float(getattr(result_obj, "binding_energy", 0.0)),
)
if method in ("om1", "om2", "om3"):
from vibeqc.semiempirical.methods.omx_params import (
load_om1_params,
load_om2_params,
load_om3_params,
)
loader = {
"om1": load_om1_params,
"om2": load_om2_params,
"om3": load_om3_params,
}[method]
params = loader()
if molecule.multiplicity > 1:
from vibeqc._vibeqc_core.semiempirical.nddo import (
compute_uomx_v2_gradient_fd,
run_uomx_v2,
)
result_obj = run_uomx_v2(molecule, params, max_iter=200)
try:
g = compute_uomx_v2_gradient_fd(molecule, params)
except Exception:
g = None
result = _SEMPR(
float(result_obj.energy),
g,
method,
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
)
else:
result_obj = run_omx_v2(molecule, params, max_iter=200)
from vibeqc._vibeqc_core.semiempirical.nddo import (
compute_omx_v2_gradient_fd,
)
try:
g = compute_omx_v2_gradient_fd(molecule, params)
except Exception:
g = None
result = _SEMPR(
float(result_obj.energy),
g,
method,
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
)
return result
raise ValueError(f"Unhandled semiempirical method: {method!r}")
def _run_ccm(molecule: Molecule, ccm_options=None):
"""Periodic MSINDO via the Cyclic Cluster Model (method="ccm").
Routes to the C++ CCM engine when available, with Python fallback.
"""
from vibeqc.molecule import ANGSTROM_TO_BOHR as _A2B
if ccm_options is None or not getattr(ccm_options, "translations", None):
raise ValueError(
"method='ccm' needs ccm_options carrying the supercell lattice: "
"run_job(mol, method='ccm', ccm_options=CCMOptions("
"translations=[[a,0,0], ...], madelung=True)). 1 vector -> CCM1D "
"(polymer), 2 -> CCM2D (surface), 3 -> CCM3D (bulk)."
)
if int(molecule.multiplicity) != 1:
raise NotImplementedError(
"periodic CCM is closed-shell (RHF) only; open-shell / ROHF CCM is "
"not implemented. Use a closed-shell cell (multiplicity=1)."
)
Z = [at.Z for at in molecule.atoms]
coords_ang = [[c / _A2B for c in at.xyz] for at in molecule.atoms]
# Try C++ CCM engine first.
try:
from pathlib import Path as _Path
from vibeqc._vibeqc_core.semiempirical.indo import (
load_params_from_json,
)
from vibeqc._vibeqc_core.semiempirical.indo import (
run_ccm as _run_ccm_cpp,
)
_PARAM_PATH = (
_Path(__file__).resolve().parent
/ "semiempirical"
/ "methods"
/ "msindo_params.json"
)
params = load_params_from_json(_PARAM_PATH.read_text())
result_obj = _run_ccm_cpp(
Z,
coords_ang,
ccm_options.translations,
params,
madelung=ccm_options.madelung,
)
if not getattr(result_obj, "converged", False):
# C++ engine declined (e.g. element out of scope) — fall back.
raise RuntimeError("C++ CCM did not converge")
except Exception:
from vibeqc.semiempirical.methods.msindo_ccm import run_ccm as _run_ccm_py
result_obj = _run_ccm_py(
Z,
coords_ang,
ccm_options.translations,
charge=int(molecule.charge),
madelung=ccm_options.madelung,
)
return _SEMPR(
float(result_obj.total_energy),
None,
"ccm",
converged=bool(result_obj.converged),
n_iter=int(result_obj.n_iter),
)
def _citation_manifest_rows(refs: Any) -> list[dict[str, Any]]:
"""Flatten an :class:`AssembledCitations` into the scalar-dict rows
the ``.system`` manifest's ``[citations]`` section expects.
Uses the *full* ``.citations`` view — every entry regardless of its
``print`` flag (CLAUDE.md §8.3/§8.5: the ``.system`` manifest is the
internal-provenance destination, so link-time-only entries like
pybind11 belong here even though they are filtered out of the
user-facing references block). Missing optional fields become ``""``
sentinels so the array-of-tables keeps a fixed shape across rows.
"""
rows: list[dict[str, Any]] = []
for c in getattr(refs, "citations", ()) or ():
year = getattr(c, "year", None)
rows.append(
{
"key": str(getattr(c, "key", "")),
"kind": str(getattr(c, "kind", "")),
"authors": "; ".join(getattr(c, "authors", ()) or ()),
"title": str(getattr(c, "title", "")),
"journal": str(getattr(c, "journal", None) or ""),
"year": year if isinstance(year, int) else str(year or ""),
"doi": str(getattr(c, "doi", None) or ""),
"license": str(getattr(c, "license", None) or ""),
"print": bool(getattr(c, "print", True)),
}
)
return rows
def _run_mlip(method: str, molecule: Molecule, mlip_options=None):
"""Run a machine-learning interatomic potential and return a
result-like object (duck-typing the SCF-result interface).
vibe-qc drives the external pre-trained model's forward pass; the
returned ``.energy`` is the model's reference-shifted DFT-surface
energy (Hartree), NOT a total electronic energy (``CLAUDE.md`` §10 /
energy-scale caveat — do not mix with HF/DFT totals).
``mlip_options`` (:class:`vibeqc.mlip.MLIPOptions`) selects the model,
device, and dtype, and carries the ASL acknowledgment. Selecting an
ASL (academic, non-commercial) model without acknowledgment raises
:class:`PermissionError` (see :mod:`vibeqc.mlip.mace`).
"""
if method == "mace":
from vibeqc.mlip.mace import MACEModel
model = MACEModel(molecule, mlip_options)
return _MLIPResult(
model.energy(),
model.gradient(),
method,
converged=True,
n_iter=1,
model_info=model.info,
)
raise ValueError(f"Unhandled MLIP method: {method!r}")
class _SEMPR:
"""Semiempirical result wrapper — duck-typing the interface
format_scf_trace and run_job expect."""
def __init__(
self,
energy,
gradient=None,
method_name="",
converged=True,
n_iter=1,
binding_energy=None,
e_solv=None,
e_gas=None,
):
self.energy = energy
self._gradient = gradient
self.method = method_name
self.converged = converged
self.n_iter = n_iter
# MSINDO reports an atomization/binding energy (E − Σ atomic reference)
# directly; None for engines that don't (PM6 / OMx / DFTB / GFN2).
self.binding_energy = binding_energy
# Implicit-solvation diagnostics (MSINDO COSMO via run_job(solvent=...)):
# ``energy`` is then the in-solvent total, ``e_gas`` the gas reference,
# ``e_solv = energy − e_gas``. None for gas-phase runs.
self.e_solv = e_solv
self.e_gas = e_gas
self.scf_trace = []
def gradient(self):
return self._gradient
class _MLIPResult:
"""Machine-learning-interatomic-potential result wrapper — duck-types
the same interface as :class:`_SEMPR` (energy, gradient(), method,
converged, n_iter, scf_trace). ``energy`` is the model's
reference-shifted DFT-surface energy (Hartree), not a total electronic
energy."""
def __init__(
self,
energy,
gradient=None,
method_name="",
converged=True,
n_iter=1,
model_info=None,
):
self.energy = energy
self._gradient = gradient
self.method = method_name
self.converged = converged
self.n_iter = n_iter
self.scf_trace = []
# MaceModelInfo (or None) — provenance for the MLIP model actually
# run; used by run_job for the per-model citation + .out/.system.
self.model_info = model_info
def gradient(self):
return self._gradient
def _format_mlip_provenance(info) -> str:
"""A self-documenting MLIP provenance block for the .out — which
external pre-trained model produced these numbers, its license, and
how to read the (model-specific, reference-shifted) energy."""
lic = info.license.upper()
if lic == "MIT":
lic_note = "MIT [free for commercial + academic use]"
elif lic == "ASL":
lic_note = "ASL [ACADEMIC, NON-COMMERCIAL use only]"
else:
lic_note = f"{info.license} [unverified — treated as academic-only]"
elts = f", {info.elements} elements" if info.elements else ""
lines = [
" " + "-" * 60,
" MACE machine-learning interatomic potential",
" " + "-" * 60,
f" Model: {info.key} ({info.domain}{elts})",
f" License: {lic_note}",
f" Training: {info.training_data} | Theory: {info.theory}",
" Energy: model-specific reference-shifted scale — NOT a",
" vibe-qc total energy, not comparable across models.",
]
if info.doi:
lines.append(f" Reference: doi:{info.doi} (cited in the .bibtex sibling)")
lines.append(
" vibe-qc drives ACEsuit MACE's pre-trained forward pass (CLAUDE.md §10)."
)
lines.append(" " + "-" * 60)
return "\n".join(lines)
def _active_rdm1(cas):
"""Active-space 1-RDM of a CASCI result's lowest root.
Feeds the natural-occupation diagnostic in the .out solver block;
routed through solvers._rdm.make_rdm12 so large full-CAS spaces use
the C++ kernel. Diagnostics must never kill a converged run, so any
failure degrades to None (the block is simply omitted).
"""
try:
from .solvers._rdm import make_rdm12
return make_rdm12(cas.ci_coeffs, cas.determinants, cas.n_active_orb)[0]
except Exception:
return None
#: Skip the per-root ⟨S²⟩ diagnostic above this determinant count: the
#: spin-orbital-dict evaluation is Python-side and would add tens of
#: seconds on multi-million-determinant direct-CI vectors.
_ROOT_S2_MAX_DET = 200_000
def _root_s2_values(cas, ms2):
"""Per-root ⟨S²⟩ of a multi-root CASCI result, for the .out root table.
Shows which spin sector each state-averaged root lives in, the
visibility surface for the 2026-06-11 spin-pure SA default (spin-pure
runs print S(S+1) rows; ``spin_pure=False`` ensembles expose their
mixed-spin composition). Same degrade-to-omission contract as
:func:`_active_rdm1`: any failure (or an oversized determinant list)
returns None and the column is omitted.
"""
try:
if cas.ci_coeffs_all is None or cas.n_det > _ROOT_S2_MAX_DET:
return None
from .solvers._ms_caspt2 import _s2_expectation
return [
float(
_s2_expectation(
cas.ci_coeffs_all[:, k],
cas.determinants,
cas.n_active_orb,
ms2,
)
)
for k in range(cas.ci_coeffs_all.shape[1])
]
except Exception:
return None
def _format_solver_trace(result: SolverResult) -> str:
"""Format a non-mean-field solver result for the .out file."""
import numpy as np
lines = []
lines.append(f" Method: {result.method}")
lines.append(f" Total energy: {result.energy:16.10f} Ha")
lines.append(f" Converged: {result.converged}")
lines.append(f" Iterations/sweeps: {result.n_iter}")
if result.pt2_correction is not None and abs(result.pt2_correction) > 1e-12:
e_var = result.energy - result.pt2_correction
lines.append(f" E(variational): {e_var:16.10f} Ha")
lines.append(f" E(PT2 correction): {result.pt2_correction:16.10f} Ha")
if result.bond_dim is not None:
lines.append(f" Bond dimension: {result.bond_dim}")
if result.truncation_error is not None:
lines.append(f" Truncation error: {result.truncation_error:.2e}")
if result.constraint_residual is not None:
lines.append(f" Constraint resid.: {result.constraint_residual:.2e}")
if result.ci_labels is not None and result.ci_coeffs is not None:
lines.append(f" Determinants: {len(result.ci_labels)}")
c_abs = np.abs(result.ci_coeffs)
n_show = min(8, len(c_abs))
idx = np.argsort(-c_abs)
lines.append(" Leading configurations:")
norb = 0
for rank in range(n_show):
i = idx[rank]
label = result.ci_labels[i]
# Format SpinDet as α|...⟩ β|...⟩, Det as |...⟩
if (
isinstance(label, tuple)
and len(label) == 2
and isinstance(label[0], tuple)
):
a_str = "".join(
"1" if j in label[0] else "0"
for j in range(max(label[0]) + 1 if label[0] else 0)
)
b_str = "".join(
"1" if j in label[1] else "0"
for j in range(max(label[1]) + 1 if label[1] else 0)
)
lines.append(f" |c_{rank}| = {c_abs[i]:.6f} α|{a_str}⟩ β|{b_str}⟩")
else:
norb = max(norb, max(label) + 1 if label else 0)
occ_str = "".join("1" if j in label else "0" for j in range(norb))
lines.append(f" |c_{rank}| = {c_abs[i]:.6f} |{occ_str}⟩")
if result.root_energies is not None and len(result.root_energies) > 1:
lines.append(" Root energies:")
s2s = result.root_s2
if s2s is not None and len(s2s) != len(result.root_energies):
s2s = None
for k, e_root in enumerate(result.root_energies):
s2_note = f" <S^2> = {s2s[k]:7.4f}" if s2s is not None else ""
lines.append(f" root {k}: E = {e_root:16.10f} Ha{s2_note}")
if result.selected_pt2:
lines.append(
" Epstein-Nesbet PT2 on the selected wavefunction"
" (Sharma et al 2017; variational energy unchanged):"
)
for k, p in enumerate(result.selected_pt2):
sig = (
f" +/- {p['stderr']:.8f}"
if p.get("stderr")
else ""
)
npert = (
f" ({p['n_perturbers']} perturbers)"
if p.get("n_perturbers") is not None
else " (semistochastic)"
)
lines.append(
f" root {k}: E_PT2 = {p['e_pt2']:16.10f} Ha{sig}"
)
lines.append(
f" E_var+PT2 = {p['e_total']:16.10f} Ha{npert}"
)
if result.multistate is not None:
ms = result.multistate
nst = ms["nroots"]
mode_label = "XMS-CASPT2" if ms["mode"] == "xms" else "MS-CASPT2"
lines.append(f" {mode_label} ({nst} states):")
lines.append(
" model-space reference energies (Ha): "
+ " ".join(f"{e:16.10f}" for e in ms["ref_energies"])
)
if ms.get("s2_values"):
lines.append(
" model-space <S^2> (spin-pure roots): "
+ " ".join(f"{s:16.4f}" for s in ms["s2_values"])
)
lines.append(
" single-state CASPT2 diagonal (Ha): "
+ " ".join(f"{e:16.10f}" for e in ms["ss_energies"])
)
if ms["mode"] == "xms":
lines.append(" XMS model-space rotation U (column = rotated state):")
for row in np.asarray(ms["xms_rotation"]):
lines.append(
" " + " ".join(f"{u:10.6f}" for u in row)
)
lines.append(" effective Hamiltonian (symmetrized, Ha):")
for row in np.asarray(ms["heff"]):
lines.append(" " + " ".join(f"{h:16.10f}" for h in row))
lines.append(" mixing (column k = multi-state root k):")
for row in np.asarray(ms["mixing"]):
lines.append(" " + " ".join(f"{c:10.6f}" for c in row))
if result.rdm1 is not None:
occ = np.linalg.eigvalsh(np.asarray(result.rdm1))[::-1]
lines.append(
" Natural occupations (active): "
+ " ".join(f"{o:7.4f}" for o in occ)
)
if result.energy_trace:
lines.append(" Energy trace:")
for i, e in enumerate(result.energy_trace):
lines.append(f" iter {i + 1:4d}: E = {e:16.10f} Ha")
return "\n".join(lines)
def _geom_summary(molecule: Molecule) -> str:
"""Three-column per-atom geometry listing (bohr)."""
lines = [" Atoms (bohr)", " " + "-" * 54]
for i, atom in enumerate(molecule.atoms, start=1):
lines.append(
f" {i:4d} Z={atom.Z:3d} "
f"{atom.xyz[0]:14.8f} {atom.xyz[1]:14.8f} {atom.xyz[2]:14.8f}"
)
lines.append(
f" charge={molecule.charge} multiplicity={molecule.multiplicity} "
f"n_electrons={molecule.n_electrons()}"
)
return "\n".join(lines)
def _job_header(
method: str, basis_name: str, functional: Optional[str], scf_reference: str = "RHF"
) -> str:
label = f"{method.upper()}"
if method in ("rks", "uks") and functional:
label = f"{label} / {functional}"
# Post-SCF methods name their mean-field reference (RHF for closed shell,
# UHF for an open-shell MP2/UMP2 run).
if method in ("ccsd", "ccsd(t)", "mp2", "scs-mp2", "sos-mp2", "dlpno-mp2",
"dlpno-ccsd", "dlpno-ccsd(t)", "ovgf"):
label = f"{scf_reference.upper()} + {method.upper()}"
return f" Job: {label} basis={basis_name}"
def _make_semiempirical_ase_calculator(
molecule: Molecule,
method: str,
):
"""Return an ASE Calculator that computes energy via the
semiempirical dispatcher and forces via central finite differences."""
try:
from ase.calculators.calculator import Calculator
from ase.units import Bohr, Hartree
except ImportError:
raise ImportError("Geometry optimization requires ASE: pip install ase")
import numpy as np
from ._vibeqc_core import Atom as _Atom
from ._vibeqc_core import Molecule as _Molecule
h = 0.001 # bohr
class _SemiempiricalCalculator(Calculator):
implemented_properties = ["energy", "forces"]
def calculate(self, atoms, properties, system_changes):
# Record self.atoms per the ASE Calculator contract. Without
# it check_state() reports every query as changed, so cached
# results are discarded (each get_* triggers a fresh solve)
# and TrajectoryWriter finds no stored energy — semiempirical
# optimization trajectories lost their per-frame energies.
Calculator.calculate(self, atoms, properties, system_changes)
positions_bohr = atoms.positions / Bohr
mol = _Molecule(
[
_Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, positions_bohr)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
result = _run_semiempirical(method, mol)
energy_ha = float(getattr(result, "energy", 0.0))
self.results["energy"] = energy_ha * Hartree
if "forces" in properties:
forces_ha_bohr = np.zeros((len(atoms), 3))
for i in range(len(atoms)):
for c in range(3):
pos_plus = positions_bohr.copy()
pos_plus[i, c] += h
mol_p = _Molecule(
[
_Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, pos_plus)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
e_p = float(
getattr(_run_semiempirical(method, mol_p), "energy", 0.0)
)
pos_minus = positions_bohr.copy()
pos_minus[i, c] -= h
mol_m = _Molecule(
[
_Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, pos_minus)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
e_m = float(
getattr(_run_semiempirical(method, mol_m), "energy", 0.0)
)
forces_ha_bohr[i, c] = (e_m - e_p) / (2.0 * h)
self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)
return _SemiempiricalCalculator()
def _make_wavefunction_ase_calculator(
molecule: Molecule,
basis_name: str,
method: str,
*,
functional=None,
selected_ci_options=None,
dmrg_options=None,
v2rdm_options=None,
transcorrelated_options=None,
casci_options=None,
caspt2_options=None,
casscf_options=None,
active_space=None,
cas_reference=None,
dft_plus_u: Optional[List["HubbardSite"]] = None,
):
"""Return an ASE Calculator that computes energy via the wavefunction
solver and forces via central finite differences (h = 0.001 bohr).
When ``dft_plus_u`` is set, the per-geometry SCF (energy + each FD
gradient probe) runs with +U via :func:`_run_single_point`'s
``dft_plus_u`` kwarg. The analytic +U gradient (Increment 3) is
*not* used by this calculator — FD is good enough for BFGS step
direction and avoids re-plumbing the calculator's force-call
contract. For a tight optimization with analytic +U gradient,
call :func:`vibeqc.compute_gradient` (or sibling) directly with
``dft_plus_u=`` per-step in your own loop.
"""
try:
from ase.calculators.calculator import Calculator
from ase.units import Bohr, Hartree
except ImportError:
raise ImportError("Geometry optimization requires ASE: pip install ase")
import numpy as np
from ._vibeqc_core import Atom, BasisSet
from ._vibeqc_core import Molecule as _Molecule
h = 0.001 # bohr — central-finite-difference step
class _WavefunctionCalculator(Calculator):
implemented_properties = ["energy", "forces"]
def calculate(self, atoms, properties, system_changes):
# Record self.atoms per the ASE Calculator contract. Without
# it check_state() reports every query as changed, so cached
# results are discarded (each get_* triggers a fresh solve)
# and TrajectoryWriter finds no stored energy — wavefunction
# optimization trajectories lost their per-frame energies.
Calculator.calculate(self, atoms, properties, system_changes)
# Convert ASE Atoms → vibeqc Molecule
positions_bohr = atoms.positions / Bohr
mol = _Molecule(
[
Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, positions_bohr)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
basis = BasisSet(mol, basis_name)
# Compute energy at current geometry. This call and the two
# FD-displaced calls below must pass the *same* solver options:
# any mismatch makes the BFGS energy and its forces sample
# different surfaces (pre-2026-06-12 the energy call dropped
# casscf_options while the FD calls carried it).
result = _run_single_point(
method,
mol,
basis,
functional=functional,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
dft_plus_u=dft_plus_u,
)
energy_ha = float(getattr(result, "energy", 0.0))
self.results["energy"] = energy_ha * Hartree
# Numerical forces via central differences
if "forces" in properties:
forces_ha_bohr = np.zeros((len(atoms), 3))
for i in range(len(atoms)):
for c in range(3):
# +h displacement
pos_plus = positions_bohr.copy()
pos_plus[i, c] += h
mol_plus = _Molecule(
[
Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, pos_plus)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
basis_plus = BasisSet(mol_plus, basis_name)
result_plus = _run_single_point(
method,
mol_plus,
basis_plus,
functional=functional,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
dft_plus_u=dft_plus_u,
)
e_plus = float(getattr(result_plus, "energy", 0.0))
# -h displacement
pos_minus = positions_bohr.copy()
pos_minus[i, c] -= h
mol_minus = _Molecule(
[
Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, pos_minus)
],
charge=molecule.charge,
multiplicity=molecule.multiplicity,
)
basis_minus = BasisSet(mol_minus, basis_name)
result_minus = _run_single_point(
method,
mol_minus,
basis_minus,
functional=functional,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
dft_plus_u=dft_plus_u,
)
e_minus = float(getattr(result_minus, "energy", 0.0))
# Central difference: F = -dE/dR
forces_ha_bohr[i, c] = -(e_plus - e_minus) / (2 * h)
self.results["forces"] = forces_ha_bohr * (Hartree / Bohr)
return _WavefunctionCalculator()
def _optimize_geometry(
molecule: Molecule,
basis_name: str,
*,
functional: Optional[str],
trajectory_path: Path,
fmax: float,
max_steps: int,
dispersion_params: Optional[D3BJParams] = None,
method: str = "rhf",
selected_ci_options=None,
dmrg_options=None,
v2rdm_options=None,
transcorrelated_options=None,
casci_options=None,
caspt2_options=None,
casscf_options=None,
active_space=None,
cas_reference=None,
rhf_options: Optional[RHFOptions] = None,
uhf_options: Optional[UHFOptions] = None,
rks_options: Optional[RKSOptions] = None,
uks_options: Optional[UKSOptions] = None,
mlip_options: Optional[MLIPOptions] = None,
) -> Molecule:
"""Run an ASE/BFGS geometry optimization, writing frames to a .traj file.
Returns a new Molecule at the optimized geometry. The SCF options
apply to every intermediate geometry, not just the final point — use
them to bump ``max_iter`` / add damping for systems where BFGS
sometimes lands on a hard-to-converge geometry (H-bonded clusters,
near-degenerate states).
For wavefunction methods (``selected_ci``, ``dmrg``, ``v2rdm``,
``transcorrelated_ci``, ``casci``, ``casscf``), forces are computed
via central finite differences because the solvers do not yet provide
analytic gradients. The per-step calculator receives the same solver
options (``casscf_options``, ``casci_options``, ``cas_reference``, …)
as the final single point, so the optimizer walks the same surface
the reported final energy is evaluated on.
"""
try:
from ase import Atoms
from ase.io.trajectory import Trajectory
from ase.optimize import BFGSLineSearch
from ase.units import Bohr
except ImportError as exc:
raise ImportError(
"Geometry optimization requires ASE. Install it with "
"`pip install ase` into your vibe-qc venv."
) from exc
from .ase import VibeQC
positions_ang = [[coord * Bohr for coord in atom.xyz] for atom in molecule.atoms]
atoms = Atoms(
numbers=[atom.Z for atom in molecule.atoms],
positions=positions_ang,
)
_se_methods = {
"dftb0",
"scc_dftb",
"pm6",
"gfn2_xtb",
"om1",
"om2",
"om3",
"msindo",
}
# casci / casscf joined 2026-06-12: they previously fell through to the
# mean-field VibeQC calculator below, so the optimizer silently walked
# the RHF/RKS surface while the final single point ran the CAS solver.
# nevpt2 / caspt2 / mrci / fci still fall through (routing them onto an
# FD-on-PT2 surface is a cost/policy call for the maintainer).
wavefunction_methods = {
"selected_ci",
"dmrg",
"v2rdm",
"transcorrelated_ci",
"casci",
"casscf",
}
if method in _se_methods:
atoms.calc = _make_semiempirical_ase_calculator(
molecule,
method,
)
elif method in _MLIP_METHODS:
# MACE geometry optimization: ASE BFGS drives the MACE ASE
# calculator directly (it works in eV/Angstrom natively, the units
# ASE optimizers expect). run_job's early ASL gate already
# validated the model; MACEModel re-checks (defense in depth).
from vibeqc.mlip.mace import MACEModel
atoms.calc = MACEModel(molecule, mlip_options).calculator
elif method in wavefunction_methods:
atoms.calc = _make_wavefunction_ase_calculator(
molecule,
basis_name,
method,
functional=functional,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
)
else:
atoms.calc = VibeQC(
basis=basis_name,
charge=molecule.charge,
multiplicity=molecule.multiplicity,
functional=functional,
dispersion=dispersion_params,
rhf_options=rhf_options,
uhf_options=uhf_options,
rks_options=rks_options,
uks_options=uks_options,
)
with Trajectory(str(trajectory_path), "w", atoms) as traj:
# BFGSLineSearch refuses uphill steps by construction — essential
# on flat / weakly-bound PESs (H-bonded clusters, dispersion-dominated
# complexes) where plain BFGS's Hessian extrapolation routinely
# pushes atoms apart. Works on covalent minima too, so we use it
# uniformly. Default ASE line-search parameters are well-tuned;
# don't clamp maxstep here or the line search can't converge.
opt = BFGSLineSearch(atoms, logfile=None)
opt.attach(traj)
opt.run(fmax=fmax, steps=max_steps)
# Re-build a vibe-qc Molecule at the optimized geometry.
from ._vibeqc_core import Atom as _Atom
new_positions_bohr = atoms.positions / Bohr
optimized = Molecule(
[_Atom(int(z), list(xyz)) for z, xyz in zip(atoms.numbers, new_positions_bohr)],
molecule.charge,
molecule.multiplicity,
)
return optimized
def _resolve_optimizer_backend(requested: str) -> str:
"""Resolve ``optimizer_backend`` to ``"ase"`` or ``"native"``."""
if requested == "ase":
try:
import ase # noqa: F401
except ImportError:
raise ImportError(
"optimizer_backend='ase' requires ASE. Install with "
"`pip install ase` or use optimizer_backend='native'."
) from None
return "ase"
if requested == "native":
return "native"
if requested != "auto":
raise ValueError(
f"optimizer_backend={requested!r} — expected 'auto', 'ase', or 'native'."
)
# "auto": prefer ASE if installed, otherwise native.
try:
import ase # noqa: F401
except ImportError:
return "native"
return "ase"
[docs]
def run_job(
molecule: Molecule,
*,
basis: Optional[str] = None,
method: Method = "auto",
functional: Optional[str] = None,
output: str | os.PathLike = "output",
optimize: bool = False,
write_molden_file: bool = True,
write_xyz_file: bool = True,
write_population_file: bool = True,
write_cube: Union[bool, str, int, list, tuple, None] = False,
cube_spacing: float = 0.2,
cube_padding: float = 4.0,
citations: bool = True,
dry_run: bool = False,
fmax: float = 0.05,
max_opt_steps: int = 200,
optimizer_backend: str = "auto",
memory_override: bool = False,
num_threads: Optional[int] = None,
dispersion: DispersionSpec = None,
solvent: object = None,
record_hostname: bool = True,
rhf_options: Optional[RHFOptions] = None,
uhf_options: Optional[UHFOptions] = None,
rks_options: Optional[RKSOptions] = None,
uks_options: Optional[UKSOptions] = None,
selected_ci_options: Optional[SelectedCIOptions] = None,
dmrg_options: Optional[DMRGOptions] = None,
v2rdm_options: Optional[V2RDMOptions] = None,
transcorrelated_options: Optional[TranscorrelatedOptions] = None,
casci_options: Optional[CASCIOptions] = None,
caspt2_options: Optional[CASPT2Options] = None,
casscf_options: Optional[CASSCFOptions] = None,
dlpno_options: Optional["DLPNOMP2Options"] = None,
dlpno_ccsd_options: Optional["DLPNOCCSDPilotOptions"] = None,
active_space: Optional[tuple[int, int]] = None,
cas_reference: Optional[str] = None,
mlip_options: Optional[MLIPOptions] = None,
ccm_options: Optional["CCMOptions"] = None,
progress: Union[bool, ProgressLogger, None] = None,
verbose: Optional[int] = None,
use_logging: bool = False,
perf_log: Optional[Union[str, os.PathLike, bool]] = None,
structured_log: Union[bool, str, os.PathLike, None] = False,
crash_dump: Union[bool, str, os.PathLike, None] = True,
# QVF visualisation archive (v1).
output_qvf: bool = False,
hessian: bool = False,
# Partial Hessian: atom indices to hold fixed so only the unfrozen
# atoms are displaced (6M instead of 6N SCFs); the frequencies are
# then vibrational-only (no gas-phase trans/rot). See HessianFDOptions.
hessian_frozen_indices: Optional[List[int]] = None,
# Thermochemistry (RRHO ideal-gas). When a Hessian is computed
# (``hessian=True``), the harmonic frequencies feed an end-of-run
# thermochemistry block (ZPE + thermal U/H/S/G at T, p). Pass a
# ThermoOptions to set temperature / pressure / rotational symmetry
# number; None uses the defaults (298.15 K, 1 atm, σ=1).
thermo_options: Optional["ThermoOptions"] = None,
# Atomization energy (Σ E_atom − E_mol) for mean-field methods. When True,
# free-atom ground-state references are computed at the same level (cached
# per element) and an atomization block is written. Main-group H–Kr; see
# vibeqc.atomization.
atomization: bool = False,
# DFT+U (Dudarev rotationally-invariant) — Increment 2c.
# List of HubbardSite objects; empty / None ≡ no +U.
dft_plus_u: Optional[List["HubbardSite"]] = None,
# MSINDO NDDO mode (method="msindo"): the reference program's default mode
# (separate parametrization + two-centre multipole 2e Fock). Ignored for
# other methods.
nddo: bool = False,
) -> object:
"""Run a vibe-qc SCF job and write the standard output files.
Parameters
----------
molecule
The :class:`Molecule` describing the system (bohr coordinates).
basis
libint-recognized basis-set name.
method
``"rhf"``, ``"uhf"``, ``"rks"``, ``"uks"``, ``"auto"``, ``"ccsd"``,
or ``"ccsd(t)"`` (picks
restricted vs unrestricted from ``molecule.multiplicity`` and
switches between HF/KS based on whether ``functional`` is set).
functional
XC functional for RKS / UKS (e.g. ``"PBE"``, ``"B3LYP"``). Ignored
for HF.
output
Path stem for the generated files. ``{output}.out`` always; also
``{output}.molden`` unless disabled; and ``{output}.traj`` when
``optimize=True``.
optimize
Run a BFGS geometry optimization first (via ASE), then the final
SCF on the optimized geometry. The trajectory is written for
animation (openable with ASE-aware viewers).
write_molden_file
Emit ``{output}.molden`` at the converged geometry. Default True.
fmax, max_opt_steps
Optimizer tolerance (eV/Å) and iteration limit. Ignored unless
``optimize=True``.
memory_override
If ``False`` (default), the driver estimates peak memory and
aborts with :class:`InsufficientMemoryError` when the estimate
exceeds the machine's available RAM. Set to ``True`` to
proceed anyway — at the risk of swap-thrashing or a system
freeze.
num_threads
If set, pin the OpenMP thread count for the duration of the
calculation. ``None`` (default) leaves the current setting in
place — which is usually "all cores" unless the environment
variable ``OMP_NUM_THREADS`` is set or
:func:`vibeqc.set_num_threads` was called earlier. The actual
thread count used is recorded in the output log for
reproducibility.
dispersion
Post-SCF D3(BJ) dispersion correction. Accepts:
* ``None`` (default) — no dispersion.
* ``True`` or ``"d3bj"`` — use D3-BJ params for the current DFT
functional.
* A functional name (``"pbe"``, ``"b3lyp"``, …) — use its D3-BJ
params (useful for ``method="rhf"`` + ``"hf"`` dispersion, or
for overriding the SCF functional in the damping lookup).
* A :class:`D3BJParams` instance — used directly.
The energy correction is written to the ``.out`` file, added to
the returned object as ``e_dispersion`` / ``energy_total`` (the
raw SCF ``.energy`` is preserved untouched), and, when
``optimize=True``, added to the forces the optimizer sees.
Routes through :func:`vibeqc.compute_d3bj` with ``backend="auto"``
— the reference ``dftd3`` backend is used when installed,
otherwise the D1a framework stub. See
:mod:`vibeqc.dispersion` for details.
solvent
v0.9.0 CPCM / COSMO implicit solvation. Accepts:
* ``None`` (default) — gas-phase SCF.
* A preset name (``"water"``, ``"dmso"``, ``"acetonitrile"``,
``"chloroform"``, ``"benzene"``, …) — looks up the static
dielectric ε from :data:`vibeqc.SOLVENT_PRESETS`.
* A numeric ε (e.g. ``78.39``) — custom dielectric.
* A dict (``{"epsilon": 25.0, "variant": "cosmo", ...}``) or a
:class:`vibeqc.SolventModel` instance for full control over
cavity construction (Bondi radii, Lebedev order, switching
width, max macro-iterations).
Routes through :func:`vibeqc.run_cpcm_scf`; macro-iterates the
apparent surface charge against the SCF density until
ΔE_solv < 1e-6 Ha (typically 3–5 outer cycles). The total
energy is the in-solvent value; the gas-phase reference is
retained on ``result.solvent_result.e_gas``. See
:doc:`/user_guide/solvation` for the full theory and the
cavity / preset table.
record_hostname
If ``False``, the per-job ``{output}.system`` manifest writes
``hostname = "<redacted>"`` instead of the live hostname. The
``VIBEQC_NO_HOSTNAME=1`` environment variable does the same
thing globally — engineering's bundled docs runs use the env
var so example outputs in ``docs/_static/examples/`` don't
leak machine names. Other manifest fields (CPU model, OS,
memory, library versions) are not redacted; the redaction is
scoped to the hostname only.
rhf_options / uhf_options / rks_options / uks_options
Optional override for the respective SCF options struct.
casci_options
CASCI knobs for ``method="casci"`` (the active space itself comes
from ``active_space=``). ``CASCIOptions(nroots=N)`` requests N CI
roots: ``result.energy`` stays the ground root, per-root energies
land in ``result.root_energies`` (per-root ⟨S²⟩ in
``result.root_s2``) and the .out solver block prints the root
table. Ignored by every other method — the SA-CASSCF root count
is ``casscf_options.nroots``, the MS-CASPT2 model-space size
``caspt2_options.nroots``.
progress
Live progress logger for long-running jobs. Default behavior
is **ON** — the job emits a banner, per-stage milestones, and
a final summary to stdout (line-flushed) so the canonical
``nohup python LiH.py > LiH.log 2>&1 &`` + ``tail -f LiH.log``
workflow shows progress in real time. The ``.out`` file is
also line-buffered so ``tail -f output-LiH.out`` works without
any extra setup.
Pass ``progress=False`` to silence stdout (the ``.out`` file
is still written normally — only the live mirror is
suppressed). Pass a :class:`vibeqc.ProgressLogger` instance
for fully custom routing (tee to a persistent file, mute,
thread one logger through nested calls).
Set ``VIBEQC_LIVE_LOGGING=0`` in the environment to disable
live progress globally — useful for batch scripts that don't
want to edit every input file. The env var only takes effect
when ``progress`` is left at its default (``None``); explicit
``progress=True`` / ``progress=False`` / a ``ProgressLogger``
instance always wins, so a debugging session can re-enable
progress for one shell.
Per-iteration progress for periodic SCFs (which run in
Python) is streamed live through this same logger via the
lower-level ``run_*_periodic_*`` entry points; molecular SCFs
run in C++ and only emit a pre-SCF banner + post-SCF summary
live, with the per-iteration trace landing in the ``.out``
when the SCF returns.
verbose
Integer verbosity level (PySCF convention, 0..9, default
4). Each level is a strict superset of the one below, so
bumping ``verbose`` only adds output:
* ``0`` — silent (nothing live; ``.out`` is still written)
* ``1`` — banner + warnings + final SCF summary only
* ``2`` — add per-stage milestones + ``info()`` lines
* ``3`` — add per-stage timing on stage exit
* ``4`` — add per-iteration SCF rows (DEFAULT)
* ``5`` — add inline RSS-memory snapshots
* ``6+`` — phase-level wall-clock breakdown live (overlaps
the post-mortem ``.perf`` log on purpose)
Pass ``verbose=None`` (the default) to read the
``VIBEQC_VERBOSE`` env var; if unset, falls back to 4.
Ignored when ``progress`` is a :class:`ProgressLogger`
instance — that logger's own level wins.
use_logging
If ``True``, route progress through
``logging.getLogger("vibeqc.run_job")`` instead of bare
``stdout`` writes. Banner / stage milestones land at
``INFO``; per-iteration SCF rows at ``DEBUG``; warnings at
``WARNING``. Composes naturally with stdlib handlers
(``RotatingFileHandler``, syslog, ``dictConfig``)::
import logging
logging.basicConfig(level=logging.INFO)
vq.run_job(..., use_logging=True)
``progress=False`` still wins as a hard kill switch — the
verbose-level gate runs *before* the logging call, so a
silent run stays silent regardless of the active logging
config. Ignored when ``progress`` is a pre-built
:class:`ProgressLogger` instance.
perf_log
Optional path (or ``True`` to use ``{output}.perf``) to write
a post-mortem performance / debug breakdown — phase-level
wall + CPU times, memory snapshots, parallelism flags. The
live ``progress=`` log shows progress *during* the run; the
perf log shows where the time went *afterwards*. Off by
default. Pass an explicit path, ``True`` to emit alongside
``{output}.out``, or set the ``VIBEQC_PERFLOG=path`` env
var (which wins when ``perf_log`` is left at ``None``).
See :mod:`vibeqc.perf` for the full instrumentation surface.
structured_log
Optional NDJSON (one-JSON-record-per-line) log capturing
every SCF transition — banner, job_start, memory_estimate,
per-iter rows, scf_converged, properties, job_end. Off by
default. Pass ``True`` to emit ``{output}.scf.jsonl``,
a path-like to write there explicitly, or set the
``VIBEQC_STRUCTURED_LOG=path`` env var (which wins when
``structured_log`` is left at ``False``). The format is
stable: events are append-only, fields are never renamed
or removed. See :mod:`vibeqc.structured_log` for the full
event catalog.
crash_dump
Write a snapshot to ``{output}.dump`` (TOML) plus binary
attachments (``.dump.density.npy``, ``.dump.fock.npy``,
``.dump.mo.npy``) when the SCF fails ungracefully — raised
exception (NaN, linear dependence, memory error). Default
``True``: post-mortem reproducibility costs nothing on
success and saves a re-run on failure. Pass ``False`` (or
set ``VIBEQC_NO_CRASH_DUMP=1`` in the environment) to
disable. The dump is written alongside ``{output}.out`` in
the standard ``output.*`` family — re-attach the ``.dump``
+ ``.dump.density.npy`` to a bug report and the maintainer
can reconstruct the failing state via
:func:`vibeqc.load_dump`. See :mod:`vibeqc.crash_dump` for
the dump format. The exception is always re-raised after
the dump is written; ``crash_dump=True`` does *not* swallow
failures.
hessian
When True, compute harmonic vibrational frequencies via
finite-difference Hessian. Default False. Cost: ~6N SCF evals.
Results printed to .out and embedded in QVF for vibe-view.
Returns
-------
The SCF result object (RHFResult / UHFResult / RKSResult / UKSResult).
"""
output_stem = Path(os.fspath(output))
out_path = output_stem.with_suffix(".out")
molden_path = output_stem.with_suffix(".molden")
traj_path = output_stem.with_suffix(".traj")
# Resolve perf_log target. ``True`` → emit to {output}.perf;
# str/Path → use that path verbatim; None → defer to
# VIBEQC_PERFLOG env var inside perf_log() (or no-op if unset).
if perf_log is True:
_perf_target: Optional[Path] = output_stem.with_suffix(".perf")
elif perf_log is False or perf_log is None:
_perf_target = None
else:
_perf_target = Path(os.fspath(perf_log))
# Resolve structured_log target. Default OFF (False) — only emit
# when the caller opts in. Resolution mirrors perf_log:
# True → {output}.scf.jsonl
# str/PathLike → use verbatim
# False / None → defer to VIBEQC_STRUCTURED_LOG env var (or
# leave disabled if the env var is unset)
if structured_log is True:
_structured_target: Optional[Path] = output_stem.with_suffix(".scf.jsonl")
elif structured_log is False or structured_log is None:
_structured_target = None
else:
_structured_target = Path(os.fspath(structured_log))
# Resolve crash_dump target. Default ON: dump on failure unless the
# caller explicitly opts out (or VIBEQC_NO_CRASH_DUMP=1). The
# post-mortem snapshot is cheap on success (zero bytes written),
# and saves a re-run on failure. Resolution:
# True → {output}.dump (default)
# str/PathLike → use verbatim as the stem
# False → disabled
# None → also default-on (treated as True)
_crash_off_env = os.environ.get(
"VIBEQC_NO_CRASH_DUMP",
"",
).strip().lower() in ("1", "true", "yes", "on")
if crash_dump is False or _crash_off_env:
_crash_stem: Optional[Path] = None
elif crash_dump is True or crash_dump is None:
_crash_stem = output_stem
else:
_crash_stem = Path(os.fspath(crash_dump))
# Default-ON progress: when the caller didn't pass a value,
# check the env-var opt-out (VIBEQC_LIVE_LOGGING=0) and otherwise
# turn it on. This lets every backgrounded ``python input.py >
# log 2>&1 &`` show progress without the user having to learn
# a new kwarg — answering the "is my job stuck or actually
# running?" question by default. Explicit progress= (True/False/
# logger) bypasses the env var entirely.
if progress is None:
env = os.environ.get("VIBEQC_LIVE_LOGGING", "").strip().lower()
if env in ("0", "false", "no", "off"):
progress = False
else:
progress = True
# Verbose level resolution (v0.5.3). When the caller leaves
# ``verbose=None`` we read VIBEQC_VERBOSE; otherwise the
# explicit argument wins. Falls back to the package default
# (level 4 — banner + stages + per-iter SCF) when neither
# is set. The env var is a parsing best-effort: a junk value
# silently falls back to the default rather than raising,
# since a typo in $VIBEQC_VERBOSE shouldn't kill someone's
# overnight run.
if verbose is None:
env_verbose = os.environ.get("VIBEQC_VERBOSE", "").strip()
if env_verbose:
try:
verbose_level = int(env_verbose)
except ValueError:
verbose_level = None # type: ignore[assignment]
else:
verbose_level = max(0, verbose_level)
else:
verbose_level = None # type: ignore[assignment]
else:
verbose_level = max(0, int(verbose))
plog = resolve_progress(
progress,
verbose=verbose_level,
use_logging=use_logging,
)
# Composite 3c keyword resolution (e.g. ``method="hf-3c"``). For
# non-composite ``method`` values this is a pass-through. Composite
# values get their (basis, functional, dispersion) tuple inferred
# from the :mod:`vibeqc.composites` registry; raises
# :class:`CompositeUnavailable` for recipes whose prerequisites
# have not yet landed.
method, basis, functional, dispersion, composite_recipe = _apply_composite(
method,
basis=basis,
functional=functional,
dispersion=dispersion,
molecule=molecule,
)
# Semiempirical methods don't use a Gaussian basis set — accept
# any basis value (including None / empty).
_se_methods = {
"dftb0",
"scc_dftb",
"pm6",
"gfn2_xtb",
"om1",
"om2",
"om3",
"msindo",
"ccm",
}
if method in _se_methods or method in _MLIP_METHODS:
if not basis:
basis = "" # placeholder; semiempirical / MLIP code ignores it
elif not basis:
raise ValueError(
"run_job: basis is required. Pass basis='def2-svp' (or "
"another supported basis), or use a composite keyword "
"(method='hf-3c', …) which carries its own basis. "
"Composite catalogue: "
f"{[r.name for r in __import__('vibeqc').list_composites()]}"
)
resolved_method = _select_method(method, molecule, functional)
# Early CCM gate (fail fast, clean "unavailable" — not a silent gap): the
# periodic Cyclic Cluster Model is single-point only through run_job. CCM
# gradients (finite-difference) + geometry optimisation live in
# msindo_ccm.ccm_gradient_fd / ccm_optimize; analytic CCM gradients and the
# run_job optimiser hook are not wired up.
if resolved_method == "ccm" and optimize:
raise NotImplementedError(
"method='ccm' geometry optimisation is not available through "
"run_job. Use vibeqc.semiempirical.methods.msindo_ccm.ccm_optimize "
"(finite-difference, fixed Wigner-Seitz) for relaxed CCM geometries."
)
# Early MLIP gate (fail fast, before any output / SCF machinery): an
# academic-only (ASL) MACE model needs explicit acknowledgment, and the
# model must cover every element in the system — else a config error
# would otherwise surface as a crashed SCF. Both checks run again inside
# MACEModel (defense in depth).
if resolved_method in _MLIP_METHODS:
from vibeqc.mlip import MLIPOptions, resolve_model
from vibeqc.mlip.mace import element_coverage_error, enforce_academic_license
_mlopts = mlip_options or MLIPOptions()
_minfo = resolve_model(_mlopts.model)
enforce_academic_license(_minfo, _mlopts)
_ecov = element_coverage_error(_minfo, molecule)
if _ecov is not None:
raise ValueError(_ecov)
# Composite recipes may request D4 dispersion; the D4 path is
# wired through compute_d4 in the post-SCF block below. A D4
# request short-circuits the D3-BJ resolution.
use_d4 = isinstance(dispersion, str) and dispersion.strip().lower() == "d4"
# Build the declarative OutputPlan once. It drives BOTH the
# dry-run pre-flight (below) and the real run's manifest
# lifecycle (the OutputWriter constructed before the SCF). One
# source of truth for "what files will this job produce".
_cube_req_plan = parse_write_cube_kwarg(write_cube)
_output_plan = OutputPlan.from_run_job_kwargs(
output=output_stem,
method=resolved_method,
basis=basis,
functional=functional,
optimize=optimize,
write_molden_file=write_molden_file,
write_xyz=write_xyz_file,
citations=citations,
write_population=write_population_file,
write_cube_density=_cube_req_plan.density,
cube_mo_labels=tuple(str(lbl) for lbl in _cube_req_plan.mo_labels),
perf_log=perf_log,
structured_log=structured_log,
crash_dump=crash_dump,
output_qvf=output_qvf,
)
# Dry-run short-circuit (Phase O3). When ``dry_run=True`` or the
# ``VIBEQC_DRY_RUN`` env var is set, write a one-shot
# ``{output}.system`` with ``[outputs].status = "dry_run"``,
# print the declared-artefacts summary to stdout, and return
# ``None`` without running the SCF. This is the pre-flight path
# that ``vq submit`` uses to learn which files a job will
# produce. Run before any heavyweight setup (basis-set
# construction, memory estimate, perf tracker) so the
# short-circuit is genuinely cheap.
if dry_run or is_dry_run_requested():
# Make sure the parent directory exists so we don't fail on a
# freshly-typed `-o /tmp/new-subdir/foo` path.
output_stem.parent.mkdir(parents=True, exist_ok=True)
dry_run_manifest(
_output_plan,
record_hostname=record_hostname,
)
return None
# Resolve dispersion up front so a bad spec fails before we touch
# the filesystem. d3_params is None iff dispersion is disabled.
# A D4 request (composite recipes r²SCAN-3c / ωB97X-3c) short-
# circuits the D3-BJ path — D4 is applied in the post-SCF block.
if use_d4:
d3_params = None
else:
d3_params = _resolve_dispersion(dispersion, functional)
# Pin the thread count if the caller asked for one; otherwise leave
# the current state alone. We always query at the end so the output
# reports what was actually used (which may differ from what the
# user asked for — e.g. on a single-core build or OpenMP-disabled
# environment).
if num_threads is not None:
set_num_threads(int(num_threads))
threads_in_use = get_num_threads()
# Ensure the parent directory exists so users can give names like
# "runs/water" without pre-creating "runs/".
out_path.parent.mkdir(parents=True, exist_ok=True)
# Construct the OutputWriter — the per-job coordinator that owns
# ``{output}.system`` for the rest of the run. This writes the
# manifest immediately with the ``[plan]`` section + ``[outputs]
# .status = "running"`` *before* the SCF starts, so a ``vq``
# daemon polling the workspace sees the declared output set and a
# liveness signal from the first moment. ``finish()`` /
# ``crash()`` at the exit paths flip the status; the end-of-job
# record-sweep fills ``[[outputs.files]]`` with the artefacts
# that actually landed. Replaces the bare end-of-job
# ``write_system_manifest`` call (pre-v1.0 dispatch-overhaul).
_output_writer = OutputWriter(
_output_plan,
record_hostname=record_hostname,
)
t_job_start = time.perf_counter()
# ``buffering=1`` requests line-buffered I/O so a `tail -f` against
# the .out file shows new lines as the SCF / property pipeline emits
# them. We also call ``f.flush()`` explicitly after major blocks for
# the same reason — line buffering only kicks in on newlines.
#
# The perf-log context manager wraps the whole body so PerfScope
# entries from anywhere downstream — periodic SCF stages, gradient
# calls, anything that calls ``with PerfScope("..."): ...`` —
# accumulate into the same tracker. When ``_perf_target`` is None
# (and ``$VIBEQC_PERFLOG`` isn't set), the context manager is a
# cheap no-op: PerfScope.__enter__/__exit__ become a single
# ContextVar lookup with no I/O.
hessian_result = None
with (
_perf_log_ctx(_perf_target),
_structured_log_ctx(_structured_target) as _slog,
crash_dump_context(_crash_stem) as _crash_target,
open(out_path, "w", encoding="utf-8", buffering=1) as f,
PerfScope("run_job.total"),
):
# Structured log: banner + job_start records. Always safe to
# emit — _slog.emit is a no-op when the log is disabled.
_libs = library_versions()
_fp = run_fingerprint(
method=resolved_method,
basis=basis,
functional=functional,
molecule=molecule,
)
_slog.emit(
"banner",
vibeqc_version=VIBEQC_VERSION,
libint=_libs.get("libint", "unknown"),
libxc=_libs.get("libxc", "unknown"),
spglib=_libs.get("spglib", "unknown"),
run_fingerprint=_fp,
)
_slog.emit(
"job_start",
method=resolved_method,
basis=basis,
functional=functional,
optimize=bool(optimize),
threads=int(threads_in_use),
n_atoms=int(len(list(molecule.atoms))),
charge=int(molecule.charge),
multiplicity=int(molecule.multiplicity),
n_electrons=int(molecule.n_electrons()),
output_stem=str(output_stem),
)
f.write(banner() + "\n\n")
# Post-SCF methods resolve their SCF to rhf/uhf; the *header* should
# still name the correlation method (RHF + MP2/CCSD), so display the
# original keyword for those (but keep "auto" → resolved everywhere
# else).
_header_method = method if method in _POSTSCF_CITE_METHODS else resolved_method
# resolved_method is the mean-field SCF a post-SCF method ran on
# (rhf/uhf) — name it as the reference in the header.
_scf_ref = resolved_method if resolved_method in ("rhf", "uhf") else "rhf"
f.write(
_job_header(_header_method, basis, functional, scf_reference=_scf_ref)
+ "\n"
)
f.write(_geom_summary(molecule) + "\n")
f.write(f" Threads: {threads_in_use} (OpenMP shared-memory parallelism)\n\n")
f.flush()
plog.banner(
f"run_job {_job_header(_header_method, basis, functional, scf_reference=_scf_ref).strip()}"
)
plog.info(f"Threads: {threads_in_use} (OpenMP shared-memory parallelism)")
plog.info(f"Output file: {out_path}")
t_opt = 0.0
_traj_frames = []
_traj_energies = []
if optimize:
# Increment 3 — analytic +U gradient via the
# variational F = F_HF + S V_AO S Fock contribution.
# The ASE _WavefunctionCalculator path is plumbed below.
# For dft_plus_u, pre-populate the relevant method-options
# struct's dft_plus_u_sites / dft_plus_u_ao_groups fields
# once at the initial geometry — the AO-group indices are
# geometry-invariant (depend only on shell layout per atom
# Z + basis name), so the same translation is valid at
# every optimization step.
if dft_plus_u and resolved_method in (
"rhf",
"uhf",
"rks",
"uks",
):
from .dft_plus_u import _apply_dft_plus_u_to_options
# Materialise a defaulted options struct if the user
# didn't pass one, so the C++ binding sees a real
# object with the dft_plus_u_sites field populated.
if resolved_method == "rhf":
if rhf_options is None:
rhf_options = RHFOptions()
_opts_for_dft_plus_u = rhf_options
elif resolved_method == "uhf":
if uhf_options is None:
uhf_options = UHFOptions()
_opts_for_dft_plus_u = uhf_options
elif resolved_method == "rks":
if rks_options is None:
rks_options = RKSOptions()
_opts_for_dft_plus_u = rks_options
else: # uks
if uks_options is None:
uks_options = UKSOptions()
_opts_for_dft_plus_u = uks_options
_basis_for_dft_plus_u = BasisSet(molecule, basis)
_apply_dft_plus_u_to_options(
_opts_for_dft_plus_u,
_basis_for_dft_plus_u,
dft_plus_u,
)
_opt_backend = _resolve_optimizer_backend(optimizer_backend)
if _opt_backend == "ase":
f.write(f" Geometry optimization (ASE/BFGS) -> {traj_path.name}\n")
f.write(f" fmax = {fmax} eV/A, max_steps = {max_opt_steps}\n\n")
f.flush()
with (
plog.stage(
"geometry_optimization",
detail=f"BFGS, fmax={fmax} eV/A, max_steps={max_opt_steps}",
),
PerfScope("geometry_optimization"),
):
t0 = time.perf_counter()
molecule = _optimize_geometry(
molecule,
basis,
functional=functional,
trajectory_path=traj_path,
fmax=fmax,
max_steps=max_opt_steps,
dispersion_params=d3_params,
method=resolved_method,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
rhf_options=rhf_options,
uhf_options=uhf_options,
rks_options=rks_options,
uks_options=uks_options,
mlip_options=mlip_options,
)
t_opt = time.perf_counter() - t0
f.write(" Optimized geometry\n")
f.write(_geom_summary(molecule) + "\n\n")
f.flush()
# Read trajectory frames for QVF archive.
_traj_frames = []
_traj_energies = []
try:
from ase.io.trajectory import Trajectory
from ase.units import Bohr, Hartree
from ._vibeqc_core import Atom
traj = Trajectory(str(traj_path))
for atoms in traj:
pos_bohr = atoms.positions / Bohr
frame_mol = Molecule(
[
Atom(int(z), list(xyz))
for z, xyz in zip(atoms.numbers, pos_bohr)
],
molecule.charge,
molecule.multiplicity,
)
_traj_frames.append(frame_mol)
try:
e_eV = atoms.get_potential_energy()
if e_eV is not None and e_eV == e_eV:
_traj_energies.append(float(e_eV) / Hartree)
except Exception as _traj_e_exc:
# ASE calculator may not expose energies
# per frame; compatibility shim — never
# crash on this.
warn_output_failure(
_traj_e_exc,
traj_path,
role="trajectory_frame_energy",
category=OutputFailureKind.compatibility_fallback,
)
except Exception as _traj_read_exc:
# The traj file was written by ASE just above but
# we can't read it back — log it (the QVF archive
# will lose its trajectory section but the SCF
# itself is fine).
warn_output_failure(
_traj_read_exc,
traj_path,
role="trajectory_read_for_qvf",
category=OutputFailureKind.optional_artifact,
)
_traj_frames = []
_traj_energies = []
else:
# Native scipy L-BFGS-B backend.
from .molecular_optimize import optimize_molecule as _native_opt
# Convert fmax from eV/Å to Ha/bohr. 1 eV/Å ≈ 0.045 Ha/bohr.
_conv_grad = fmax * 0.045
f.write(
f" Geometry optimization (native L-BFGS-B)\n"
f" gtol = {_conv_grad:.2e} Ha/bohr, max_steps = {max_opt_steps}\n\n"
)
f.flush()
with (
plog.stage(
"geometry_optimization",
detail=f"L-BFGS-B, gtol={_conv_grad:.1e} Ha/bohr",
),
PerfScope("geometry_optimization"),
):
t0 = time.perf_counter()
_opt_result = _native_opt(
molecule,
basis,
method=resolved_method,
functional=functional,
rhf_options=rhf_options,
uhf_options=uhf_options,
rks_options=rks_options,
uks_options=uks_options,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
max_iter=max_opt_steps,
conv_tol_grad=_conv_grad,
dispersion_params=d3_params,
solvent=solvent,
record_trajectory=bool(output_qvf),
progress=False,
)
t_opt = time.perf_counter() - t0
molecule = _opt_result.system
_traj_frames = _opt_result.trajectory_frames or None
_traj_energies = _opt_result.trajectory_energies or None
f.write(" Optimized geometry\n")
f.write(_geom_summary(molecule) + "\n\n")
f.flush()
# Semiempirical methods don't use a Gaussian basis; skip
# BasisSet construction and memory estimation.
if resolved_method in _se_methods or resolved_method in _MLIP_METHODS:
basis_obj = None
estimate = None
else:
with PerfScope("basis_set_construction"):
basis_obj = BasisSet(molecule, basis)
# Memory pre-flight
if resolved_method in ("rhf", "uhf", "rks", "uks"):
est_opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
else:
est_opts = None
estimate = estimate_memory(
molecule,
basis_obj,
method=resolved_method,
options=est_opts,
)
if estimate is not None:
f.write(
" "
+ format_memory_report(
estimate,
override_requested=memory_override,
).replace("\n", "\n ")
+ "\n\n"
)
f.flush()
_slog.emit(
"memory_estimate",
total_gb=float(estimate.total_gb),
raw_total_bytes=int(estimate.raw_total_bytes),
headroom_factor=float(estimate.headroom_factor),
by_category={k: int(v) for k, v in estimate.by_category.items()},
)
check_memory(estimate, allow_exceed=memory_override)
if resolved_method in _MLIP_METHODS:
plog.info(
f"Evaluating {resolved_method.upper()} pre-trained MLIP "
"(single forward pass; no SCF)."
)
else:
plog.info(
f"Starting molecular SCF ({resolved_method.upper()}"
+ (f" / {functional}" if functional else "")
+ "). Per-iteration progress runs in compiled C++ and is "
"not streamed live; the trace will be written when the SCF "
"returns."
)
# Memory snapshot before SCF: lets the perf log show RSS
# growth caused by the integral / Fock build, separate from
# the basis-set / pre-flight overhead.
from .perf import active_tracker as _at
_tracker = _at()
if _tracker is not None:
_tracker.snapshot_memory("start_of_scf")
t0 = time.perf_counter()
# Crash-dump scope: if the C++ SCF raises (NaN, linear
# dependence, memory error, …), we want a snapshot of the
# last known state on disk before the exception bubbles up
# and the user has nothing to debug from. The state we can
# capture from outside the C++ call is limited (no live
# density / Fock) — we record the geometry, options, and
# phase. Any deeper introspection needs a future C++ hook.
try:
with PerfScope(f"scf.{resolved_method}"):
result = _run_single_point(
resolved_method,
molecule,
basis_obj,
functional=functional,
rhf_options=rhf_options,
uhf_options=uhf_options,
rks_options=rks_options,
uks_options=uks_options,
selected_ci_options=selected_ci_options,
dmrg_options=dmrg_options,
v2rdm_options=v2rdm_options,
transcorrelated_options=transcorrelated_options,
casci_options=casci_options,
caspt2_options=caspt2_options,
casscf_options=casscf_options,
active_space=active_space,
cas_reference=cas_reference,
mlip_options=mlip_options,
ccm_options=ccm_options,
solvent=solvent,
dft_plus_u=dft_plus_u,
nddo=nddo,
)
except BaseException as _scf_exc:
t_scf = time.perf_counter() - t0
f.write(f"\n SCF FAILED: {type(_scf_exc).__name__}: {_scf_exc}\n")
f.flush()
_slog.emit(
"scf_failed",
exception_type=type(_scf_exc).__name__,
exception=str(_scf_exc),
wall_s=float(t_scf),
)
if _crash_target is not None:
_opts_for_dump = (
{
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if resolved_method in ("rhf", "uhf", "rks", "uks")
else None
)
dump_on_failure(
_crash_target,
_scf_exc,
{"phase": f"scf.{resolved_method}", "n_iters_completed": 0},
options=_opts_for_dump,
molecule=molecule,
)
f.write(
f" Crash dump written to "
f"{_crash_target.with_suffix('.dump').name}\n"
)
f.flush()
# Flip the manifest status to "crashed" before the
# exception propagates — a ``vq`` daemon polling
# ``{output}.system`` then sees the job aborted rather
# than mistaking a stale "running" for a live job.
try:
_output_writer.crash(wall_seconds=t_scf)
except Exception as _crash_exc:
# Manifest bookkeeping must never mask the real SCF
# exception we're about to re-raise.
warn_output_failure(
_crash_exc,
_output_writer.manifest_path,
role="manifest_crash_status",
category=OutputFailureKind.manifest_recording,
)
# Re-raise: crash_dump=True does NOT swallow the failure;
# it just makes the failure debuggable.
raise
t_scf = time.perf_counter() - t0
if _tracker is not None:
_tracker.snapshot_memory("end_of_scf")
# Also record the per-iteration trace so the perf log
# carries the same info as the .out's SCF trace, in
# tabular form. The C++ result.scf_trace is the canonical
# source for these numbers.
for step in getattr(result, "scf_trace", []) or []:
_tracker.add_scf_iter(
iter=int(step.iter),
energy=float(step.energy),
dE=float(step.delta_e) if step.iter > 1 else None,
grad=float(step.grad_norm),
diis=int(step.diis_subspace),
)
# Structured log: per-iter rows sourced from the C++ trace,
# which is the canonical record of what the SCF did. Periodic
# SCFs (Python-driven) emit ``scf_iter`` live via
# ProgressLogger.iteration() — see vibeqc.progress; the
# molecular C++ path runs as a black box so we replay its
# trace here. The matching ``scf_converged`` record is emitted
# below via plog.converged() so periodic and molecular paths
# share the funnel.
for step in getattr(result, "scf_trace", []) or []:
_slog.emit(
"scf_iter",
iter=int(step.iter),
energy=float(step.energy),
dE=(float(step.delta_e) if step.iter > 1 else None),
grad_norm=float(step.grad_norm),
diis_subspace=int(step.diis_subspace),
)
# Non-convergence path: write a crash dump too, since "ran
# to max_iter" is a real failure mode. We don't raise — the
# SCF returned a result and the user can inspect it — but we
# leave the .dump alongside the .out so the diagnostic recipe
# is the same as the exception path.
if not bool(getattr(result, "converged", False)) and _crash_target is not None:
dump_state = {
"phase": f"scf.{resolved_method} (max_iter)",
"scf_trace": list(getattr(result, "scf_trace", []) or []),
"n_iters_completed": int(getattr(result, "n_iter", 0)),
}
for k_state, k_attr in (
("density", "density"),
("density_alpha", "density_alpha"),
("density_beta", "density_beta"),
("fock", "fock"),
("fock_alpha", "fock_alpha"),
("fock_beta", "fock_beta"),
("mo_coeffs", "mo_coeffs"),
("mo_coeffs_alpha", "mo_coeffs_alpha"),
("mo_coeffs_beta", "mo_coeffs_beta"),
):
v = getattr(result, k_attr, None)
if v is not None:
dump_state[k_state] = v
_opts_for_dump = (
{
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
if resolved_method in ("rhf", "uhf", "rks", "uks")
else None
)
dump_on_failure(
_crash_target,
None,
dump_state,
phase=f"scf.{resolved_method} (max_iter)",
options=_opts_for_dump,
molecule=molecule,
)
f.write(
f"\n SCF did not converge — crash dump written to "
f"{_crash_target.with_suffix('.dump').name}\n"
)
f.flush()
# Per-analysis success flags from the .out properties block, so the
# citation assembler below can gate best-effort analyses (Hirshfeld)
# on whether they actually computed + surfaced for this job. Stays
# empty on the solver-trace / MLIP branches (no properties block),
# which correctly suppresses the Hirshfeld citation there.
_prop_status: dict[str, bool] = {}
# Check if we got a SolverResult (non-mean-field method)
if isinstance(result, SolverResult):
f.write("\n " + "-" * 60 + "\n")
f.write(" Non-mean-field solver result\n")
f.write(" " + "-" * 60 + "\n")
f.write(_format_solver_trace(result) + "\n")
elif getattr(result, "model_info", None) is not None:
# MLIP (method="mace"): a single pre-trained forward pass — there
# is NO SCF here, no Fock matrix, no DIIS. Emitting the empty SCF
# iteration table (||[F,DS]|| / DIIS columns, "converged in 1
# iterations") would misrepresent what MACE does, so print the
# single-shot energy instead, then the model provenance block
# (model + license + energy-scale caveat) so the .out is self-
# documenting about the external model behind these numbers.
f.write("\n " + "-" * 60 + "\n")
f.write(" Single-point energy — pre-trained MLIP forward pass (no SCF)\n")
f.write(f" E = {float(result.energy):.10f} Ha\n")
# _format_mlip_provenance opens with its own separator rule.
f.write(_format_mlip_provenance(result.model_info) + "\n")
else:
f.write(
format_scf_trace(
result,
molecule=molecule,
basis=basis_obj,
include_banner=False,
property_status=_prop_status,
)
+ "\n"
)
# MSINDO COSMO (run_job(method="msindo", solvent=...)) — surface the
# solvation energy + gas reference alongside the in-solvent total.
_esolv = getattr(result, "e_solv", None)
if _esolv is not None:
_Hk = 627.509474063
_eg = getattr(result, "e_gas", None)
f.write(
"\n ## Implicit solvation (COSMO)\n "
+ "-" * 52
+ "\n"
+ (
f" Gas-phase total = {_eg:16.10f} Ha\n"
if _eg is not None
else ""
)
+ f" Solvation energy = {_esolv:16.10f} Ha"
f" ({_esolv * _Hk:9.2f} kcal/mol)\n\n"
)
_slog.emit(
"e_solv", value=float(_esolv), value_kcal=float(_esolv * _Hk)
)
# MSINDO reports an atomization/binding energy (E − Σ atomic
# reference, ATENG) directly — surface it, the semiempirical
# analogue of the mean-field atomization block.
_be = getattr(result, "binding_energy", None)
if _be is not None:
_Hk = 627.509474063
f.write(
"\n ## Atomization / binding energy\n " + "-" * 52 + "\n"
f" Binding energy (E − Σ ATENG) = {_be:16.10f} Ha"
f" ({_be * _Hk:9.2f} kcal/mol)\n\n"
)
_slog.emit(
"binding_energy", value=float(_be), value_kcal=float(_be * _Hk)
)
f.flush()
if resolved_method in _MLIP_METHODS:
plog.info(
"MLIP energy evaluated (single forward pass, no SCF); "
f"E = {float(getattr(result, 'energy', 0.0)):.10f} Ha"
)
else:
plog.converged(
n_iter=int(getattr(result, "n_iter", 0)),
energy=float(getattr(result, "energy", 0.0)),
converged=bool(getattr(result, "converged", False)),
)
# ---- SCF diagnostics: "possibly conducting state" warning -----
# If the converged HOMO-LUMO gap is below 1e-4 Ha (~3 meV) the
# Aufbau-filled ground state is unreliable — the user should
# enable Fermi-Dirac smearing.
_CONDUCTING_GAP_THRESHOLD_HA = 1.0e-4
has_mo_energies = hasattr(result, "mo_energies")
if has_mo_energies and bool(getattr(result, "converged", False)):
eps = result.mo_energies
# Determine occupation count for this method.
n_elec = sum(a.Z for a in molecule.atoms) - getattr(molecule, "charge", 0)
if method in ("rhf", "rks"):
n_occ = n_elec // 2
else:
n_occ = None
if n_occ is not None and n_occ > 0 and n_occ < len(eps):
gap = float(eps[n_occ] - eps[n_occ - 1])
if gap < _CONDUCTING_GAP_THRESHOLD_HA:
_ev_factor = 27.211386245988
msg = (
f" WARNING: POSSIBLY CONDUCTING STATE — "
f"HOMO-LUMO gap = {gap:.6f} Ha ({gap * _ev_factor:.4f} eV). "
f"Consider Fermi-Dirac smearing (smearing_options=...)."
)
f.write("\n" + msg + "\n\n")
f.flush()
import warnings as _warnings
_warnings.warn(msg)
# Post-SCF Coupled-Cluster (CCSD / CCSD(T)). Closed-shell DF
# kernel anchored against the in-repo spin-orbital SGWB-1991
# reference (tests/test_ccsd_anchor.py); dense O(N^6) pilot,
# small molecules only (see docs/handover_ccsd.md).
if method in ("ccsd", "ccsd(t)") and bool(getattr(result, "converged", False)):
from .cc import CCSDOptions, chemical_core_orbital_count
from .cc import run_ccsd as _run_ccsd_py
cc_opts = CCSDOptions()
cc_opts.compute_triples = method == "ccsd(t)"
# Default frozen core: one chemical-core count per atom
# (noble-gas cores; H2O -> 1, CO2 -> 3).
cc_opts.n_frozen_core = chemical_core_orbital_count(molecule)
cc_opts.resolve_aux_basis(basis)
with (
plog.stage(
"ccsd",
detail="DF-CCSD" + ("(T)" if cc_opts.compute_triples else ""),
),
PerfScope("ccsd"),
):
cc_result = _run_ccsd_py(molecule, basis_obj, result, cc_opts)
_slog.emit(
"ccsd_converged",
converged=bool(cc_result.converged),
n_iter=int(cc_result.n_iter),
e_ccsd_correlation=float(cc_result.e_ccsd_correlation),
e_ccsd=float(cc_result.e_ccsd),
e_t=float(cc_result.e_t) if cc_opts.compute_triples else None,
e_total=float(cc_result.e_total),
t1_norm=float(cc_result.t1_norm),
t2_norm=float(cc_result.t2_norm),
)
# Write CCSD iteration table
triples_label = " CCSD(T)" if cc_opts.compute_triples else " CCSD"
f.write("\n Coupled-Cluster" + triples_label + "\n " + "-" * 78 + "\n")
if cc_opts.compute_triples:
hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format(
"Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS"
)
else:
hdr = " {:<5s} {:>16s} {:>12s} {:>12s} {:>12s} {:>6s}".format(
"Iter", "E(CCSD)", "dE", "|R1|", "|R2|", "DIIS"
)
f.write(hdr + "\n")
f.write(" " + "-" * 78 + "\n")
for step in cc_result.cc_trace:
f.write(
" {:<5d} {:>16.10f} {:>12.6e} {:>12.6e} {:>12.6e} {:>6d}\n".format(
int(step.iter),
float(step.energy),
float(step.delta_e) if step.iter > 1 else 0.0,
float(step.r1_norm),
float(step.r2_norm),
int(step.diis_subspace),
)
)
f.write(" " + "-" * 78 + "\n")
if cc_result.converged:
f.write(f" CCSD converged in {cc_result.n_iter} iterations\n")
else:
f.write(" CCSD DID NOT CONVERGE\n")
f.write(
f" E(CCSD correlation) = {cc_result.e_ccsd_correlation:16.10f} Ha\n"
f" E(CCSD) = {cc_result.e_ccsd:16.10f} Ha\n"
)
if cc_opts.compute_triples:
f.write(
f" E(T) correction = {cc_result.e_t:16.10f} Ha\n"
f" E(CCSD(T)) = {cc_result.e_ccsd_t:16.10f} Ha\n"
)
f.write(
f" T1 norm = {cc_result.t1_norm:16.10f}\n"
f" T2 norm = {cc_result.t2_norm:16.10f}\n"
)
f.write("\n")
f.flush()
# Expose the CC result without mutating the pybind11 SCF
# struct (def_readonly fields, no __dict__).
result = _CCSDAugmented(result, cc_result)
# Post-SCF Møller–Plesset MP2 (+ spin-component-scaled SCS / SOS).
# RHF-reference only (closed-shell guarded early). The native C++ MP2
# carries the c_os / c_ss spin scaling; the factors are taken from the
# single source of truth in vibeqc.correlation so the run_job variants
# and the general/MSINDO kernels never drift. The unscaled e_os / e_ss
# are always reported so a user can re-derive any variant from one run.
if method in ("mp2", "scs-mp2", "sos-mp2") and bool(
getattr(result, "converged", False)
):
from .correlation import _MP2_SCALES
_os_s, _ss_s = _MP2_SCALES[method]
# Closed shell → restricted MP2; open shell → native UMP2 (UHF
# reference). Both carry the c_os/c_ss spin-component scaling and
# report e_hf / e_correlation / e_total; the spin channels differ
# (RMP2: e_os/e_ss; UMP2: e_ab opposite-spin, e_aa+e_bb same-spin).
_open = molecule.multiplicity > 1
if _open:
from ._vibeqc_core import UMP2Options
from ._vibeqc_core import run_ump2 as _run_ump2
_mp2_opts = UMP2Options()
else:
from ._vibeqc_core import MP2Options
from ._vibeqc_core import run_mp2 as _run_mp2
_mp2_opts = MP2Options()
_mp2_opts.c_os = _os_s
_mp2_opts.c_ss = _ss_s
_detail = ("U" if _open else "") + method.upper()
with plog.stage("mp2", detail=_detail), PerfScope("mp2"):
if _open:
mp2_result = _run_ump2(molecule, basis_obj, result, _mp2_opts)
else:
mp2_result = _run_mp2(molecule, basis_obj, result, _mp2_opts)
# Uniform opposite-/same-spin view across R/U (Grimme channels:
# opposite = αβ, same = αα+ββ).
_e_os = float(mp2_result.e_ab if _open else mp2_result.e_os)
_e_ss = float(
(mp2_result.e_aa + mp2_result.e_bb) if _open else mp2_result.e_ss
)
_slog.emit(
"mp2_done",
variant=method,
open_shell=_open,
c_os=float(_os_s),
c_ss=float(_ss_s),
e_os=_e_os,
e_ss=_e_ss,
e_correlation=float(mp2_result.e_correlation),
e_total=float(mp2_result.e_total),
)
_ref = "UHF" if _open else "RHF"
f.write("\n Møller–Plesset MP2 (" + _detail + ")\n " + "-" * 78 + "\n")
f.write(
f" E({_ref} reference) = {mp2_result.e_hf:16.10f} Ha\n"
f" E(OS, opposite-spin) = {_e_os:16.10f} Ha"
f" (c_os = {_os_s:.4f})\n"
f" E(SS, same-spin) = {_e_ss:16.10f} Ha"
f" (c_ss = {_ss_s:.4f})\n"
f" E(MP2 correlation) = {mp2_result.e_correlation:16.10f} Ha\n"
)
_total_label = f"E({method.upper()} total)"
f.write(f" {_total_label:<22s} = {mp2_result.e_total:16.10f} Ha\n")
f.write("\n")
f.flush()
# Expose the MP2 result for programmatic access. The C++ SCF
# result has no __dict__, so we wrap it in a transparent proxy
# (mirrors the dispersion path): result.mp2 carries the
# MP2Result, result.energy_total is the MP2 total, and every
# SCF attribute still forwards through.
result = _MP2Augmented(result, mp2_result)
# Post-SCF DLPNO-MP2 (vibeqc.dlpno.mp2): Foster-Boys-localised
# occupieds, PAO domains with semicanonical virtual bases, PNO
# compression with the semicanonical truncation correction, and the
# coupled LMP2 residual iteration (Pinski et al., JCP 143, 034108
# (2015)). Closed-shell RHF reference only for now. The RI fitting
# basis resolves per the correlation ("ri") aux family of the
# orbital basis; pass DLPNOMP2Options via dlpno_options to control
# thresholds / frozen core.
if method == "dlpno-mp2" and bool(getattr(result, "converged", False)):
if molecule.multiplicity > 1:
raise ValueError(
"method='dlpno-mp2' currently supports closed-shell "
"(multiplicity 1) references only; open-shell DLPNO is "
"a later milestone (see HANDOVER_DLPNO.md)."
)
from .density_fitting import (
DensityFitting as _PyDF,
default_aux_basis_for as _aux_for,
)
from .dlpno.mp2 import DLPNOMP2Options, run_dlpno_mp2
_dlpno_opts = dlpno_options if dlpno_options is not None else DLPNOMP2Options()
_aux_name = _aux_for(basis_obj.name, kind="ri")
_aux_obj = BasisSet(molecule, _aux_name)
with plog.stage("dlpno-mp2", detail=_aux_name), PerfScope("dlpno_mp2"):
_df = _PyDF(basis_obj, _aux_obj, aux_basis_name=_aux_name)
dlpno_result = run_dlpno_mp2(
molecule, basis_obj, result, _df, _dlpno_opts
)
_n_kept = dlpno_result.n_pairs
_n_scr = dlpno_result.n_pairs_screened
_avg_pno = (
sum(dlpno_result.pno_per_pair.values()) / _n_kept
if _n_kept
else 0.0
)
_slog.emit(
"dlpno_mp2_done",
aux_basis=_aux_name,
converged=bool(dlpno_result.converged),
n_iter=int(dlpno_result.n_iter),
n_frozen=int(dlpno_result.n_frozen),
n_pairs=int(_n_kept),
n_pairs_screened=int(_n_scr),
avg_pno_per_pair=float(_avg_pno),
e_corr_iterated=float(dlpno_result.e_corr_iterated),
e_pno_correction=float(dlpno_result.e_pno_correction),
e_distant=float(dlpno_result.e_distant),
e_correlation=float(dlpno_result.e_corr),
e_total=float(dlpno_result.e_total),
)
f.write("\n DLPNO-MP2 (Pinski 2015; RI: " + _aux_name + ")\n " + "-" * 78 + "\n")
f.write(
f" E(RHF reference) = {dlpno_result.e_hf:16.10f} Ha\n"
f" pairs kept / screened = {_n_kept:d} / {_n_scr:d}"
f" (frozen core: {dlpno_result.n_frozen:d})\n"
f" avg PNOs per pair = {_avg_pno:14.1f}\n"
f" E(iterated pairs) = {dlpno_result.e_corr_iterated:16.10f} Ha\n"
f" E(PNO truncation corr) = {dlpno_result.e_pno_correction:16.10f} Ha\n"
f" E(distant-pair est.) = {dlpno_result.e_distant:16.10f} Ha\n"
f" E(DLPNO-MP2 corr) = {dlpno_result.e_corr:16.10f} Ha\n"
f" E(DLPNO-MP2 total) = {dlpno_result.e_total:16.10f} Ha\n"
)
if not dlpno_result.converged:
f.write(" WARNING: coupled LMP2 iteration did not converge\n")
f.write("\n")
f.flush()
result = _DLPNOMP2Augmented(result, dlpno_result)
# Post-SCF DLPNO-CCSD / CCSD(T) correctness pilot
# (vibeqc.dlpno.ccsd): the Riplinger-2013 PNO ansatz iterated
# through the FCI-anchored spin-orbital engine. Exact physics,
# O(N^6) pilot cost — hard-capped at max_nbf basis functions
# until the reduced-scaling engine (M3c) lands.
if method in ("dlpno-ccsd", "dlpno-ccsd(t)") and bool(
getattr(result, "converged", False)
):
if molecule.multiplicity > 1:
raise ValueError(
f"method={method!r} currently supports closed-shell "
"(multiplicity 1) references only; open-shell DLPNO is "
"a later milestone (see HANDOVER_DLPNO.md)."
)
from .density_fitting import (
DensityFitting as _PyDF,
default_aux_basis_for as _aux_for,
)
from .dlpno.ccsd import (
DLPNOCCSDPilotOptions,
run_dlpno_ccsd_pilot,
)
_cc_opts = (
dlpno_ccsd_options
if dlpno_ccsd_options is not None
else DLPNOCCSDPilotOptions()
)
_cc_opts.compute_triples = method == "dlpno-ccsd(t)"
_aux_name = _aux_for(basis_obj.name, kind="ri")
_aux_obj = BasisSet(molecule, _aux_name)
with plog.stage("dlpno-ccsd", detail=_aux_name), PerfScope("dlpno_ccsd"):
_df = _PyDF(basis_obj, _aux_obj, aux_basis_name=_aux_name)
cc_result = run_dlpno_ccsd_pilot(
molecule, basis_obj, result, _df, _cc_opts
)
_avg_pno = (
sum(cc_result.pno_per_pair.values()) / cc_result.n_pairs
if cc_result.n_pairs
else 0.0
)
_slog.emit(
"dlpno_ccsd_done",
variant=method,
aux_basis=_aux_name,
converged=bool(cc_result.converged),
n_iter=int(cc_result.n_iter),
n_frozen=int(cc_result.n_frozen),
n_pairs=int(cc_result.n_pairs),
avg_pno_per_pair=float(_avg_pno),
t1_norm=float(cc_result.t1_norm),
e_ccsd_correlation=float(cc_result.e_corr),
e_t=float(cc_result.e_t),
e_total=float(cc_result.e_total),
)
f.write(
"\n DLPNO-" + ("CCSD(T)" if _cc_opts.compute_triples else "CCSD")
+ " pilot (Riplinger 2013; RI: " + _aux_name + ")\n " + "-" * 78 + "\n"
)
f.write(
f" E(RHF reference) = {cc_result.e_hf:16.10f} Ha\n"
f" pairs / avg PNOs = {cc_result.n_pairs:d} / {_avg_pno:.1f}"
f" (frozen core: {cc_result.n_frozen:d})\n"
f" E(CCSD correlation) = {cc_result.e_corr:16.10f} Ha\n"
)
if _cc_opts.compute_triples:
f.write(f" E((T) correction) = {cc_result.e_t:16.10f} Ha\n")
f.write(
f" E(total) = {cc_result.e_total:16.10f} Ha\n"
" note: O(N^6) correctness pilot (max_nbf-capped) — the\n"
" reduced-scaling DLPNO engine is in progress (M3c).\n"
)
if not cc_result.converged:
f.write(" WARNING: CCSD pilot iteration did not converge\n")
f.write("\n")
f.flush()
result = _DLPNOCCSDAugmented(result, cc_result)
# Post-SCF OVGF / GF2 Green's-function quasiparticle IPs / EAs. Closed
# shell uses the spatial diagonal second-order self-energy; open shell
# (UHF reference) uses the spin-resolved spin-orbital self-energy
# (vibeqc.propagator) — the electron-propagator analogue of UMP2.
_ovgf_ok = method == "ovgf" and bool(getattr(result, "converged", False))
if _ovgf_ok and molecule.multiplicity != 1:
import numpy as _np
from ._vibeqc_core import compute_eri as _compute_eri
from .propagator import (
unrestricted_quasiparticle_energies as _u_qp_energies,
)
EV = 27.211386245988
Ca = _np.asarray(result.mo_coeffs_alpha)
Cb = _np.asarray(result.mo_coeffs_beta)
ea_mo = _np.asarray(result.mo_energies_alpha)
eb_mo = _np.asarray(result.mo_energies_beta)
nbf = len(ea_mo)
# The spin-orbital tensor is (2·n_bf)⁴ — guard tighter than the
# closed-shell n_bf⁴ path.
if 2 * nbf > 80:
raise NotImplementedError(
f"open-shell method='ovgf' builds the full spin-orbital MO "
f"two-electron tensor (2·n_bf={2 * nbf}); it is limited to "
f"2·n_bf <= 80 (n_bf <= 40). Use a smaller basis."
)
n_elec = molecule.n_electrons()
na = (n_elec + molecule.multiplicity - 1) // 2
nb = n_elec - na
# α/β frontier orbitals: spin-HOMO (IP) + spin-LUMO (EA) per channel.
orbs = []
for _sp, _no in (("alpha", na), ("beta", nb)):
if _no >= 1:
orbs.append((_sp, _no - 1))
if _no < nbf:
orbs.append((_sp, _no))
with plog.stage("ovgf", detail="open-shell GF2"), PerfScope("ovgf"):
eri_ao = _np.asarray(_compute_eri(basis_obj))
res = _u_qp_energies(
eri_ao, Ca, Cb, ea_mo, eb_mo, na, nb, orbs, iterate=True
)
def _occ(sp, ix):
return ix < (na if sp == "alpha" else nb)
ips = [-q.eps_qp * EV for sp, q in res if _occ(sp, q.orbital)]
eas = [-q.eps_qp * EV for sp, q in res if not _occ(sp, q.orbital)]
first_ip = min(ips) if ips else None
ea_val = max(eas) if eas else None
_slog.emit(
"ovgf_done",
open_shell=True,
orbitals=[[sp, q.orbital] for sp, q in res],
eps_scf=[float(q.eps_scf) for sp, q in res],
eps_qp=[float(q.eps_qp) for sp, q in res],
pole_strength=[float(q.pole_strength) for sp, q in res],
ip_first_ev=(None if first_ip is None else float(first_ip)),
ea_ev=(None if ea_val is None else float(ea_val)),
)
f.write(
"\n OVGF / GF2 quasiparticle energies (open-shell, "
"spin-resolved second-order self-energy)\n " + "-" * 78 + "\n"
)
f.write(
" {:>5s} {:>5s} {:>4s} {:>13s} {:>13s} {:>8s}\n".format(
"spin", "MO", "occ", "Koopmans(eV)", "OVGF (eV)", "pole Z"
)
)
for sp, q in res:
f.write(
" {:>5s} {:>5d} {:>4s} {:>13.4f} {:>13.4f} {:>8.4f}\n".format(
sp,
q.orbital,
"occ" if _occ(sp, q.orbital) else "vir",
q.eps_scf * EV,
q.eps_qp * EV,
q.pole_strength,
)
)
f.write(" " + "-" * 78 + "\n")
if first_ip is not None:
f.write(f" OVGF first ionization potential = {first_ip:.4f} eV\n")
if ea_val is not None:
f.write(
f" OVGF electron affinity (lowest virtual) = "
f"{ea_val:.4f} eV"
f" (small-basis EA — qualitative only)\n"
)
f.write("\n")
f.flush()
result = _OVGFAugmented(result, res)
elif _ovgf_ok:
import numpy as _np
from ._vibeqc_core import compute_eri as _compute_eri
from .propagator import quasiparticle_energies as _qp_energies
C = _np.asarray(result.mo_coeffs)
eps_mo = _np.asarray(result.mo_energies)
nbf = len(eps_mo)
# The diagonal self-energy needs the full MO 2e tensor (n_bf⁴); guard
# large systems rather than OOM on the transform.
if nbf > 100:
raise NotImplementedError(
f"method='ovgf' builds the full MO two-electron tensor "
f"(n_bf={nbf}); it is currently limited to small/medium "
f"systems (n_bf <= 100). Use a smaller basis, or the "
f"vibeqc.propagator API with on-demand integral blocks."
)
nocc = molecule.n_electrons() // 2
with plog.stage("ovgf", detail="GF2 self-energy"), PerfScope("ovgf"):
eri_ao = _np.asarray(_compute_eri(basis_obj))
eri_mo = _np.einsum(
"pqrs,pi,qj,rk,sl->ijkl", eri_ao, C, C, C, C, optimize=True
)
# Outer valence: up to 3 highest occupied + LUMO.
lo = max(0, nocc - 3)
orbs = (
list(range(lo, nocc + 1)) if nocc < nbf else list(range(lo, nocc))
)
qp = _qp_energies(eps_mo, nocc, eri_mo, orbs, iterate=True)
EV = 27.211386245988
_homo_q = next(q for q in qp if q.orbital == nocc - 1)
_lumo_q = next((q for q in qp if q.orbital == nocc), None)
_slog.emit(
"ovgf_done",
orbitals=orbs,
eps_scf=[float(q.eps_scf) for q in qp],
eps_qp=[float(q.eps_qp) for q in qp],
pole_strength=[float(q.pole_strength) for q in qp],
ip_homo_ev=float(-_homo_q.eps_qp * EV),
ea_lumo_ev=(None if _lumo_q is None else float(-_lumo_q.eps_qp * EV)),
)
f.write(
"\n OVGF / GF2 quasiparticle energies (diagonal "
"second-order self-energy)\n " + "-" * 78 + "\n"
)
f.write(
" {:>5s} {:>6s} {:>13s} {:>13s} {:>8s}\n".format(
"MO", "label", "Koopmans(eV)", "OVGF (eV)", "pole Z"
)
)
for q in qp:
lbl = (
"HOMO"
if q.orbital == nocc - 1
else "LUMO"
if q.orbital == nocc
else ""
)
f.write(
" {:>5d} {:>6s} {:>13.4f} {:>13.4f} {:>8.4f}\n".format(
q.orbital, lbl, q.eps_scf * EV, q.eps_qp * EV, q.pole_strength
)
)
f.write(" " + "-" * 78 + "\n")
f.write(
f" OVGF ionization potential (HOMO) = {-_homo_q.eps_qp * EV:.4f} eV\n"
)
if _lumo_q is not None:
# EA = -eps_qp(LUMO). The diagonal GF2 self-energy gives only
# qualitative EAs in a small/non-augmented basis (non-variational
# virtuals; no diffuse functions for a bound anion) — flagged so
# users don't read a positive value as a stable anion.
f.write(
f" OVGF electron affinity (LUMO) = "
f"{-_lumo_q.eps_qp * EV:.4f} eV"
f" (small-basis EA — qualitative only)\n"
)
if 2 * nbf <= 60:
# Renormalized GF2 (full third-order self-energy + geometric
# screening) — trims the bare-GF2 overcorrection back toward
# experiment. Small systems only (the third-order contractions
# scale steeply). See vibeqc.propagator (renormalized second-
# order GF; lineage Cederbaum 1975 / von Niessen 1984).
from .propagator import renormalized_quasiparticle_energies as _ren_qp
_ren = _ren_qp(
eri_ao,
C,
C,
eps_mo,
eps_mo,
nocc,
nocc,
[("alpha", nocc - 1)],
iterate=True,
)[0][1]
f.write(
f" GF2(renorm) ionization potential (HOMO) = "
f"{-_ren.eps_qp * EV:.4f} eV"
f" (renormalized; pole Z={_ren.pole_strength:.3f})\n"
)
f.write("\n")
f.flush()
result = _OVGFAugmented(result, qp)
# Atomization energy (Σ E_atom − E_mol) at the molecule's mean-field
# level. Free-atom ground-state references are computed at the same
# level (cached per element). Mean-field methods only (a post-SCF
# method's atomization would need atomic post-SCF references too).
if (
atomization
and method in ("rhf", "uhf", "rks", "uks")
and bool(getattr(result, "converged", False))
):
try:
from .atomization import atomization_energy as _atomization
_atm = _atomization(
molecule, float(result.energy), method, basis, functional=functional
)
_Hk = 627.509474063
_slog.emit(
"atomization",
method=method,
functional=functional,
e_molecule=_atm.e_molecule,
e_atoms_sum=_atm.e_atoms_sum,
atomization=_atm.atomization,
atomization_per_atom=_atm.atomization_per_atom,
atomic_energies={
str(z): e for z, e in _atm.atomic_energies.items()
},
)
_flabel = f"/{functional}" if functional else ""
f.write("\n ## Atomization energy\n " + "-" * 52 + "\n")
f.write(
f" Free-atom references ({method.upper()}{_flabel}/{basis}):\n"
)
for _z in sorted(_atm.atomic_energies):
f.write(
f" Z = {_z:<3d} E = {_atm.atomic_energies[_z]:16.10f} Ha\n"
)
f.write(
f" Σ E(atoms) = {_atm.e_atoms_sum:16.10f} Ha\n"
f" E(molecule) = {_atm.e_molecule:16.10f} Ha\n"
f" Atomization energy = {_atm.atomization:16.10f} Ha"
f" ({_atm.atomization_kcal:9.2f} kcal/mol)\n"
f" per atom (n={_atm.n_atoms}) = "
f"{_atm.atomization_per_atom * _Hk:9.2f} kcal/mol\n\n"
)
f.flush()
except Exception as _atm_exc:
f.write(
f"\n (warning: atomization not available: "
f"{type(_atm_exc).__name__}: {_atm_exc})\n\n"
)
f.flush()
# Post-SCF dispersion (D3-BJ) as an additive correction.
# Captured in a local so the composite-total sum below is
# independent of whether ``result`` got wrapped in
# _DispersionAugmented — a SolverResult is never wrapped (it has
# no .e_dispersion attribute), and reading the term back off the
# result then silently dropped D3-BJ from the composite total
# (audit F1.2).
e_d3bj = 0.0
if d3_params is not None:
disp = compute_d3bj(molecule, d3_params)
e_d3bj = float(disp.energy)
e_scf = float(getattr(result, "energy", 0.0))
e_total = e_scf + e_d3bj
f.write(
"\n Dispersion correction (D3-BJ)\n"
" " + "-" * 52 + "\n"
f" {'s6':>10s} {d3_params.s6:14.6f}\n"
f" {'s8':>10s} {d3_params.s8:14.6f}\n"
f" {'a1':>10s} {d3_params.a1:14.6f}\n"
f" {'a2':>10s} {d3_params.a2:14.6f}\n"
f" {'E_disp':>10s} {disp.energy:14.8f} Ha"
f" ({disp.energy * 627.5094740631:+.4f} kcal/mol)\n"
f" {'E_SCF':>10s} {e_scf:14.8f} Ha\n"
f" {'E_total':>10s} {e_total:14.8f} Ha\n"
)
# Wrap the SCF result so callers can access both components
# without breaking existing accesses to mo_coeffs, fock, etc.
if not isinstance(result, SolverResult):
result = _DispersionAugmented(result, e_d3bj, d3_params)
# ---- Composite 3c post-SCF corrections (D4 + gCP + SRB) -------
# Only runs when a composite recipe is active. Each piece is
# additive and logged separately; the composite total is the
# SCF energy + every correction.
e_d4 = 0.0
e_gcp = 0.0
e_srb = 0.0
if use_d4:
if not dftd4_available():
raise ImportError(
"composite recipe requests D4 dispersion but the "
"optional 'dftd4' package is not installed.\n "
"Install via `pip install -e '.[dispersion]'` or "
"`pip install dftd4`."
)
# dftd4 ships per-composite damping keyed on the composite
# name (e.g. 'r2scan-3c', 'wb97x-3c') — distinct from the
# parent-functional damping. Prefer the composite name.
d4_func = (
composite_recipe.name
if composite_recipe is not None
else (functional or "hf")
)
d4_result = compute_d4(
molecule,
d4_func,
charge=float(molecule.charge),
)
e_d4 = float(d4_result.energy)
f.write(
"\n Dispersion correction (D4)\n"
" " + "-" * 52 + "\n"
f" {'functional':>10s} {d4_result.functional!s:>14s}\n"
f" {'E_disp':>10s} {e_d4:14.8f} Ha"
f" ({e_d4 * 627.5094740631:+.4f} kcal/mol)\n"
)
if composite_recipe is not None and composite_recipe.gcp_basis:
# Damped composites (pbeh-3c / hse-3c / r2scan-3c) carry a
# GCPDamping; pass it through as (dmp_scal, dmp_exp). Undamped
# recipes (hf-3c, b3lyp-3c) leave it None. b97-3c has
# gcp_basis=None (SRB-only) so this block is skipped for it.
_gcp_damping = (
(
composite_recipe.gcp_damping.dmp_scal,
composite_recipe.gcp_damping.dmp_exp,
)
if composite_recipe.gcp_damping is not None
else None
)
try:
gcp_result = compute_gcp(
molecule,
composite_recipe.gcp_basis,
damping=_gcp_damping,
)
e_gcp = float(gcp_result.energy)
f.write(
"\n Geometric counterpoise correction (gCP)\n"
" " + "-" * 52 + "\n"
f" {'basis':>10s} {gcp_result.basis_name!s:>14s}\n"
f" {'E_gCP':>10s} {e_gcp:14.8f} Ha"
f" ({e_gcp * 627.5094740631:+.4f} kcal/mol)\n"
)
except GCPDataMissing as exc:
# gCP data table not bundled for this basis — log and
# continue with E_gCP = 0 so the composite total is
# explicitly marked incomplete rather than silently
# wrong. Same posture as the dispersion builtin-stub.
f.write(
"\n Geometric counterpoise correction (gCP)\n"
" " + "-" * 52 + "\n"
f" SKIPPED — {exc}\n"
)
if composite_recipe is not None and composite_recipe.sr_mod is not None:
srb = composite_recipe.sr_mod
atomic_numbers = [int(a.Z) for a in molecule.atoms]
positions = [list(a.xyz) for a in molecule.atoms]
e_srb = float(srb.evaluate(atomic_numbers, positions))
f.write(
f"\n {srb.name}\n"
" " + "-" * 52 + "\n"
f" {'E_SRB':>10s} {e_srb:14.8f} Ha"
f" ({e_srb * 627.5094740631:+.4f} kcal/mol)\n"
)
if composite_recipe is not None:
composite_total = (
float(getattr(result, "energy", 0.0)) + e_d3bj + e_d4 + e_gcp + e_srb
)
f.write(
f"\n Composite total ({composite_recipe.name})\n"
" " + "-" * 52 + "\n"
f" {'E_SCF':>10s} "
f"{float(getattr(result, 'energy', 0.0)):14.8f} Ha\n"
f" {'E_disp':>10s} "
f"{e_d3bj + e_d4:14.8f} Ha\n"
f" {'E_gCP':>10s} {e_gcp:14.8f} Ha\n"
f" {'E_SRB':>10s} {e_srb:14.8f} Ha\n"
f" {'E_total':>10s} {composite_total:14.8f} Ha\n"
)
has_mos = hasattr(result, "mo_energies") or hasattr(result, "mo_energies_alpha")
if write_molden_file and has_mos:
with (
plog.stage("write_molden", detail=str(molden_path.name)),
PerfScope("write_molden"),
):
write_molden(
molden_path,
molecule,
basis_obj,
result,
title=str(output_stem.name),
)
f.write(f"\n Molecular orbitals written to {molden_path.name}\n")
f.flush()
# Population dump (Phase O6). Always-on for molecular runs by
# default — cheap to compute (the SCF density is in memory),
# cheap to write, and downstream tooling shouldn't have to
# screen-scrape the .out for charges + bond orders + dipole.
# The matching block stays in .out for human reading; the
# .txt + .json siblings are the machine-readable form.
if write_population_file and has_mos:
pop_txt = output_stem.parent / (output_stem.name + ".population.txt")
try:
with (
plog.stage("write_population", detail=str(pop_txt.name)),
PerfScope("write_population"),
):
write_population(
output_stem,
result,
basis_obj,
molecule,
)
f.write(
f" Population dump written to "
f"{pop_txt.name} + "
f"{output_stem.name}.population.json\n"
)
f.flush()
except Exception as _pop_exc:
f.write(
f" (warning: population dump failed: "
f"{type(_pop_exc).__name__}: {_pop_exc})\n"
)
f.flush()
warn_writer_failure(
_pop_exc,
pop_txt,
role="population_summary",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
# Volumetric cubes (Phase O6). Opt-in via write_cube=: True /
# "density" / "homo" / "lumo" / list-of-those-or-int-indices.
# Each cube is wrapped in its own try/except so a single
# grid-evaluation failure on one MO doesn't block the others.
_cube_req = parse_write_cube_kwarg(write_cube)
if _cube_req and has_mos:
if _cube_req.density:
try:
with (
plog.stage("write_cube_density"),
PerfScope("write_cube_density"),
):
cube_path = write_cube_density_for_run_job(
output_stem,
result,
basis_obj,
molecule,
spacing=cube_spacing,
padding=cube_padding,
)
f.write(f" Density cube written to {cube_path.name}\n")
f.flush()
except Exception as _cube_exc:
f.write(
f" (warning: density cube write failed: "
f"{type(_cube_exc).__name__}: {_cube_exc})\n"
)
f.flush()
warn_writer_failure(
_cube_exc,
output_stem.with_suffix(".density.cube"),
role="density_cube",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
if _cube_req.mo_labels:
try:
_mo_targets = requested_mo_indices(
list(_cube_req.mo_labels),
result,
molecule,
)
except Exception as _exc:
f.write(
f" (warning: MO label resolution failed: "
f"{type(_exc).__name__}: {_exc})\n"
)
warn_writer_failure(
_exc,
output_stem.with_suffix(".cube"),
role="mo_label_resolution",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
_mo_targets = []
for _idx, _name in _mo_targets:
try:
with (
plog.stage(f"write_cube_mo_{_name}"),
PerfScope(f"write_cube_mo_{_name}"),
):
_mo_path = write_cube_mo_for_run_job(
output_stem,
result,
basis_obj,
molecule,
_idx,
_name,
spacing=cube_spacing,
padding=cube_padding,
)
f.write(f" MO cube written to {_mo_path.name}\n")
f.flush()
except Exception as _cube_exc:
f.write(
f" (warning: MO {_name} cube write failed: "
f"{type(_cube_exc).__name__}: {_cube_exc})\n"
)
f.flush()
warn_writer_failure(
_cube_exc,
output_stem.with_suffix(f".{_name}.cube"),
role=f"mo_cube_{_name}",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
# Citations (Phase O5b). Assemble the reference list for this
# job (software + libint + libxc-if-DFT + basis-set +
# functional + DIIS + dispersion-if-used + ECP-if-used) and
# emit {stem}.bibtex + .references siblings. The matching
# "## References" block is appended to the .out a few lines
# below so the text log carries the same references. Failures
# are non-fatal: a routing miss or a writer crash leaves a
# warning in the .out and the SCF result is unaffected.
cite_block_text: str | None = None
_refs = None
if citations:
try:
_cite_db = load_default_database()
_dispersion_key = None
_dispersion_params_key = None
if d3_params is not None:
# The database routes "d3bj" → Grimme 2010 + 2011.
_dispersion_key = "d3bj"
elif use_d4:
# "d4" → Caldeweyher 2019 (method paper), plus —
# for parametrizations fit outside that paper —
# the damping-parameter fit paper via
# routes.dispersion_params["d4:<key>"]. The key
# mirrors the damping lookup in the post-SCF D4
# block above: composite name when a 3c recipe is
# active, else the SCF functional ("hf" for pure
# Hartree-Fock + D4).
_dispersion_key = "d4"
_dispersion_params_key = normalize_d4_key(
composite_recipe.name
if composite_recipe is not None
else (functional or "hf")
)
# Detect direct SCF usage for citation routing.
_uses_direct = _detect_direct_scf(
resolved_method,
rhf_options,
uhf_options,
rks_options,
uks_options,
basis,
molecule,
)
# SCF accelerator picked up from opts.scf_accelerator —
# fires the matching DIIS / EDIIS / ADIIS / KDIIS
# citation instead of always crediting plain DIIS.
_scf_accel = _detect_scf_accelerator(
resolved_method,
rhf_options,
uhf_options,
rks_options,
uks_options,
)
# ECP detection — libecpint citation only fires when
# at least one options struct carries an ecp_centers
# list (manual ECPCenter recipe or auto_ecp_centers).
_uses_ecp = _detect_uses_ecp(
rhf_options,
uhf_options,
rks_options,
uks_options,
)
# CPCM solvation citation fires when run_job was
# invoked with a non-None ``solvent``; vibe-qc on main
# only ships CPCM-style models, so the variant key
# defaults to "cpcm".
_uses_cpcm = solvent is not None
# For composite-3c jobs (hf-3c / pbeh-3c / …) route
# citations by the *composite keyword*, not the
# resolved mean-field method — the composite carries
# its own defining paper (+ gCP / D3) in
# routes.methods. ``composite_recipe`` is None for
# non-composite jobs, in which case the resolved
# method name is the right routing key.
# Post-SCF correlation methods run an RHF/UHF SCF first
# (resolved_method = "rhf"/"uhf"), but their citation key is
# the *original* method — routes.methods.{ccsd,mp2,scs-mp2,…}
# carry the correlation papers (Møller–Plesset, Grimme SCS,
# Jung SOS, Purvis–Bartlett …). Routing on resolved_method
# would silently drop those references.
_cite_method = (
composite_recipe.name
if composite_recipe is not None
else method
if method in _POSTSCF_CITE_METHODS
else resolved_method
)
# MLIP engines (method="mace") evaluate no Gaussian
# integrals and run no SCF — suppress the always-on
# libint + DIIS routes so the references reflect what
# actually ran. The MACE / e3nn / PyTorch citations come
# in via routes.methods[<mlip>] instead.
_is_mlip = resolved_method in _MLIP_METHODS
# INDO-family engines (MSINDO molecular + periodic CCM) use
# analytic Slater (STO) integrals, not libint Gaussians — so the
# always-on libint integral citation must not fire (they do use
# Pulay DIIS, so uses_scf stays on).
_is_indo = resolved_method in ("msindo", "ccm")
# Per-model MLIP foundation-model citation: the model
# actually run (result.model_info) decides which foundation
# paper to cite (MPA-0 -> Batatia 2024; OFF23 -> Kovács
# 2023). The MACE *method* paper fires via the static
# routes.methods.mace route.
_mlip_extra: list[str] = []
if _is_mlip:
_mi = getattr(result, "model_info", None)
if _mi is not None and getattr(_mi, "citation", ""):
_mlip_extra.append(_mi.citation)
# UNO-CAS starting reference (Pulay-Hamilton 1988): cite when
# an open-shell determinant-solver job resolved to the UHF
# natural-orbital reference (the open-shell default; see the
# cas_reference resolution in _run_single_point).
if resolved_method in (
"selected_ci",
"dmrg",
"v2rdm",
"transcorrelated_ci",
"casci",
"mrci",
"casscf",
"nevpt2",
"caspt2",
) and (
cas_reference or ("uno" if molecule.multiplicity > 1 else "rhf")
) == "uno":
_mlip_extra.append("pulay_hamilton_uno_1988")
# Multi-state CASPT2: the effective-Hamiltonian formalism
# (Finley 1998) fires for both modes; the extended (XMS)
# rotation adds Granovsky 2011 + Shiozaki 2011.
if (
resolved_method == "caspt2"
and caspt2_options is not None
and caspt2_options.multistate is not None
):
_mlip_extra.append("finley_ms_caspt2_1998")
if caspt2_options.multistate == "xms":
_mlip_extra.append("granovsky_xmcqdpt2_2011")
_mlip_extra.append("shiozaki_xms_caspt2_2011")
# Selected-CI CASSCF backend: the CI inside the macro-
# iteration is the CIPSI selection algorithm (the static
# routes only fire for method="selected_ci").
if (
resolved_method == "casscf"
and casscf_options is not None
and casscf_options.ci_solver == "selected_ci"
):
_mlip_extra.append("huron_malrieu_cipsi_1973")
_mlip_extra.append("holmes_tubman_umrigar_shci_2016")
# The Epstein-Nesbet PT2 stage on the selected
# wavefunction cites the semistochastic-HCI paper
# (the routes.methods key fires only for a literal
# selected_ci_pt2 method; this hook is the live
# path for the CASSCF composition).
if getattr(casscf_options, "pt2", None) is not None:
_mlip_extra.append("sharma_semistochastic_hci_2017")
# Integral/exchange acceleration (RI-J Coulomb fitting /
# RIJCOSX chain-of-spheres exchange) from the density_fit /
# cosx flags on the resolved options struct.
_accel = _detect_acceleration(
resolved_method,
rhf_options,
uhf_options,
rks_options,
uks_options,
)
# Second-order convergers (SOSCF / TRAH) cite their defining
# papers when armed by a positive threshold.
_uses_soscf, _uses_trah = _detect_soscf_trah(
resolved_method,
rhf_options,
uhf_options,
rks_options,
uks_options,
)
# Explicit non-AUTO SCF initial guess (SAD / SAP / extended
# Hückel) carries a defining-paper route.
_scf_guess = _detect_scf_guess(
resolved_method,
rhf_options,
uhf_options,
rks_options,
uks_options,
)
# Analytic-gradient + vibrational-analysis drivers. The
# Hessian block (further below) runs a finite-difference
# Hessian *of the analytic atomic gradient*, but only when the
# SCF converged — so the Hessian and gradient driver
# citations gate on convergence. Geometry optimisation also
# evaluates the analytic gradient at every step. Neither
# applies to MLIP engines, whose forces come from the model
# (cited via routes.methods[<mlip>]), not Pulay/Hellmann-
# Feynman theory.
_uses_hessian = (
bool(hessian)
and bool(getattr(result, "converged", False))
and not _is_mlip
)
_uses_gradient = (bool(optimize) or _uses_hessian) and not _is_mlip
# Population analysis (Mulliken / Löwdin charges + Mayer bond
# orders) is computed and surfaced together whenever the
# population dump runs (Phase O6, above) — cite each analysis
# the user actually sees in the .population.* siblings + .out.
# Hirshfeld is best-effort on top (Becke-Lebedev grid + SAD
# promolecule, either of which can fail independently): cite it
# only when it actually computed and reached the .out Atomic-
# charges table for *this* job. `_prop_status["hirshfeld"]` is
# the success flag format_scf_trace recorded above — gating on
# it (rather than the route merely existing) keeps us from
# over-claiming on a grid/promolecule build failure (§ 8).
_props: list[str] = []
if write_population_file and has_mos:
_props = ["mulliken", "lowdin_charges", "mayer_bond_order"]
if _prop_status.get("hirshfeld"):
_props.append("hirshfeld")
_refs = _cite_db.assemble(
method=_cite_method,
basis=basis,
functional=functional,
dispersion=_dispersion_key,
dispersion_params=_dispersion_params_key,
scf_accelerator=_scf_accel,
uses_ase=optimize, # BFGS path uses ASE
uses_ecp=_uses_ecp,
uses_cpcm=_uses_cpcm,
# MSINDO COSMO uses its GEPOL cavity (+ Silla 1991); the
# HF/DFT Lebedev path keeps the default "cpcm" bundle.
solvent_variant=(
"cosmo_gepol"
if (_uses_cpcm and resolved_method == "msindo")
else None
),
direct_scf=_uses_direct,
dft_plus_u=bool(dft_plus_u),
uses_integrals=not (_is_mlip or _is_indo),
uses_scf=not _is_mlip,
acceleration=_accel,
uses_soscf=_uses_soscf,
uses_trah=_uses_trah,
scf_guess=_scf_guess,
uses_gradient=_uses_gradient,
uses_hessian=_uses_hessian,
properties=_props,
extra_entries=_mlip_extra,
)
# Feed the full assembled provenance into the .system
# manifest's [citations] section (CLAUDE.md §8.3/§8.5),
# before writing the user-facing siblings so a brittle
# .bibtex write can't deprive .system of the record.
_output_writer.set_citations(_citation_manifest_rows(_refs))
write_bibtex(output_stem, _refs)
write_references(output_stem, _refs)
cite_block_text = format_references_block(_refs)
_bib_name = output_stem.with_suffix(".bibtex").name
_ref_name = output_stem.with_suffix(".references").name
f.write(f" Citations written to {_bib_name} + {_ref_name}\n")
f.flush()
except Exception as _cite_exc:
f.write(
f" (warning: citation emission failed: "
f"{type(_cite_exc).__name__}: {_cite_exc})\n"
)
f.flush()
warn_writer_failure(
_cite_exc,
output_stem.with_suffix(".bibtex"),
role="citations",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
# Final geometry as a plain XYZ sibling (Phase O3). ``molecule``
# at this point is the geometry the SCF actually ran on — the
# optimised one when ``optimize=True``, otherwise the input.
# Energy in Hartree is appended to the comment line so ASE /
# Open Babel can recover the SCF result from the .xyz alone.
if write_xyz_file:
xyz_path = output_stem.with_suffix(".xyz")
try:
energy_ha = float(getattr(result, "energy", float("nan")))
except (TypeError, ValueError):
energy_ha = float("nan")
try:
with (
plog.stage("write_xyz", detail=str(xyz_path.name)),
PerfScope("write_xyz"),
):
write_xyz(
output_stem,
molecule,
energy_ha=(None if energy_ha != energy_ha else energy_ha),
)
f.write(f" Final geometry written to {xyz_path.name}\n")
f.flush()
except Exception as _xyz_exc:
# Geometry writer is best-effort — never let a `.xyz`
# failure tank a finished SCF. The user can always
# re-run with ``write_xyz_file=False``.
f.write(
f" (warning: .xyz writer failed: "
f"{type(_xyz_exc).__name__}: {_xyz_exc})\n"
)
f.flush()
warn_writer_failure(
_xyz_exc,
xyz_path,
role="final_xyz_geometry",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
# Structured log: properties event. Best-effort — the helpers
# raise on basis-set shapes vibe-qc's Python side can't
# parse; a failure here must never tank the run, so we wrap
# each section. The event only fires when at least one
# property was successfully computed.
if _slog.enabled and has_mos:
_props_payload: dict[str, Any] = {}
try:
from .properties import (
dipole_moment as _dipole,
)
from .properties import (
loewdin_charges as _low,
)
from .properties import (
mulliken_charges as _mul,
)
_props_payload["mulliken"] = [
float(x) for x in _mul(result, basis_obj, molecule)
]
_props_payload["loewdin"] = [
float(x) for x in _low(result, basis_obj, molecule)
]
_dip = _dipole(result, basis_obj, molecule)
_props_payload["dipole"] = {
"x": float(_dip.x),
"y": float(_dip.y),
"z": float(_dip.z),
"total": float(_dip.total),
"total_debye": float(_dip.total_debye),
}
except Exception as _props_exc:
# Properties are best-effort; the trace already
# carries a degraded properties block, the structured
# log just skips the event. Surface a warning so the
# user knows the structured-log `properties` event was
# dropped.
warn_output_failure(
_props_exc,
output_stem.with_suffix(".out"),
role="structured_log_properties",
category=OutputFailureKind.compatibility_fallback,
)
if _props_payload:
_slog.emit("properties", **_props_payload)
if optimize:
if _opt_backend == "ase" and traj_path.is_file():
f.write(f" Optimization trajectory written to {traj_path.name}\n")
f.flush()
t_total = time.perf_counter() - t_job_start
n_iter = getattr(result, "n_iter", 0)
iter_avg = (t_scf / n_iter) if n_iter > 0 else float("nan")
f.write("\n Timings (wall clock, seconds)\n " + "-" * 52 + "\n")
if optimize:
f.write(f" {'Geometry optimization':<32s} {t_opt:12.3f}\n")
if resolved_method in _MLIP_METHODS:
# MLIP: a single forward pass, not an SCF — honest phase label,
# no per-iteration average.
f.write(f" {'MLIP energy evaluation':<32s} {t_scf:12.3f}\n")
else:
f.write(f" {'SCF total':<32s} {t_scf:12.3f}\n")
f.write(
f" {'SCF avg. per iteration':<32s} {iter_avg:12.3f} ({n_iter} iters)\n"
)
f.write(f" {'Job total':<32s} {t_total:12.3f}\n")
f.write(
f" Used {threads_in_use} OpenMP thread"
f"{'s' if threads_in_use != 1 else ''}.\n"
)
f.flush()
plog.info(f"Job total {t_total:.2f}s — output written to {out_path}")
_slog.emit(
"job_end",
total_wall_s=float(t_total),
scf_wall_s=float(t_scf),
opt_wall_s=float(t_opt),
n_iter=int(getattr(result, "n_iter", 0)),
converged=bool(getattr(result, "converged", False)),
energy=float(getattr(result, "energy", 0.0)),
out_path=str(out_path),
)
# "## References" block (Phase O5b) — embed the assembled
# citation list in the .out so the text log is self-contained.
if cite_block_text:
f.write("\n" + cite_block_text + "\n")
f.flush()
# --- Harmonic vibrational analysis (finite-difference Hessian) ---
if hessian:
_scf_converged = bool(getattr(result, "converged", False))
if not _scf_converged:
f.write(
"\n ## Vibrational Frequencies\n"
" " + "-" * 52 + "\n"
" SKIPPED -- SCF did not converge.\n"
)
f.flush()
else:
with (
plog.stage("hessian_fd", detail="finite-difference Hessian"),
PerfScope("hessian_fd"),
):
try:
from .hessian import (
HessianFDOptions,
compute_hessian_fd,
ir_intensities,
)
_hess_scf_opts = {
"rhf": rhf_options,
"uhf": uhf_options,
"rks": rks_options,
"uks": uks_options,
}.get(resolved_method)
_hess_opts = HessianFDOptions(
include_dipole_derivatives=True,
frozen_indices=hessian_frozen_indices,
)
hessian_result = compute_hessian_fd(
molecule,
basis,
method=resolved_method.upper(),
scf_options=_hess_scf_opts,
hessian_options=_hess_opts,
)
f.write(
f"\n ## Vibrational Frequencies\n"
f" " + "-" * 52 + "\n"
f" Finite-difference Hessian"
f" (step = {_hess_opts.step_bohr:.3f} bohr,"
f" {hessian_result.n_displacements} displacements)\n"
f" Imaginary modes: {hessian_result.imaginary_count}\n"
f" Linear molecule: {hessian_result.is_linear}\n\n"
)
_n_skip = 5 if hessian_result.is_linear else 6
_freqs = hessian_result.frequencies_cm1
_ir = None
try:
_ir = ir_intensities(hessian_result)
except Exception as _ir_exc:
f.write(
f" (warning: IR intensities not available: "
f"{type(_ir_exc).__name__}: {_ir_exc})\n"
)
_header = " {:<6s} {:>10s}" + (
" {:>12s}" if _ir is not None else ""
)
if _ir is not None:
f.write(
_header.format(
"Mode", "Freq/cm\u207b\u00b9", "IR/(km/mol)"
)
+ "\n"
)
f.write(" " + "-" * 52 + "\n")
for k in range(_n_skip, len(_freqs)):
_label = f"{k - _n_skip + 1}"
_freq = _freqs[k]
_iri = _ir[k]
if _freq < 0:
_freq_str = f"{abs(_freq):.1f}i"
else:
_freq_str = f"{_freq:.1f}"
f.write(
f" {_label:<6s} {_freq_str:>10s} {_iri:>12.2f}\n"
)
else:
f.write(
_header.format("Mode", "Freq/cm\u207b\u00b9") + "\n"
)
f.write(" " + "-" * 52 + "\n")
for k in range(_n_skip, len(_freqs)):
_label = f"{k - _n_skip + 1}"
_freq = _freqs[k]
if _freq < 0:
_freq_str = f"{abs(_freq):.1f}i"
else:
_freq_str = f"{_freq:.1f}"
f.write(f" {_label:<6s} {_freq_str:>10s}\n")
f.write("\n")
f.flush()
# Thermochemistry (RRHO ideal gas) from the harmonic
# frequencies — ZPE + thermal U/H/S/G at T, p. Reuses
# the general vibeqc.thermo engine; method-agnostic
# (any reference that produced the Hessian).
try:
from .thermo import (
ThermoOptions as _ThermoOptions,
)
from .thermo import (
compute_thermochemistry as _compute_thermo,
)
_topts = thermo_options or _ThermoOptions()
_thermo = _compute_thermo(
molecule, hessian_result, options=_topts
)
_Hced = 627.509474063 # Hartree -> kcal/mol
_slog.emit(
"thermochemistry",
temperature=float(_topts.temperature),
pressure=float(_topts.pressure),
symmetry_number=int(_topts.symmetry_number),
rotor_type=_thermo.rotor_type,
zpe=float(_thermo.zpe),
u_thermal=float(_thermo.u_thermal),
h_thermal=float(_thermo.h_thermal),
g_thermal=float(_thermo.g_thermal),
s_total=float(_thermo.s_total),
e_scf=float(getattr(result, "energy", float("nan"))),
)
_e0 = float(getattr(result, "energy", float("nan")))
f.write(
" ## Thermochemistry (RRHO ideal gas)\n"
" " + "-" * 52 + "\n"
f" T = {_topts.temperature:.2f} K"
f" p = {_topts.pressure:.0f} Pa"
f" σ_rot = {_topts.symmetry_number}"
f" rotor = {_thermo.rotor_type}\n"
)
if _thermo.n_imaginary_modes_excluded:
f.write(
f" (excluded {_thermo.n_imaginary_modes_excluded}"
f" imaginary mode(s) from the partition function)\n"
)
f.write(
f" Zero-point energy = {_thermo.zpe:16.10f} Ha"
f" ({_thermo.zpe * _Hced:9.3f} kcal/mol)\n"
f" Thermal corr. to U = {_thermo.u_thermal:16.10f} Ha\n"
f" Thermal corr. to H = {_thermo.h_thermal:16.10f} Ha\n"
f" Thermal corr. to G = {_thermo.g_thermal:16.10f} Ha\n"
f" Total entropy S = {_thermo.s_total:16.10f} Ha/K"
f" ({_thermo.s_total * _Hced * 1000:8.3f} cal/mol/K)\n"
)
if _e0 == _e0: # not NaN
f.write(
f" E(elec) + ZPE = {_e0 + _thermo.zpe:16.10f} Ha\n"
f" H = E(elec) + H_corr = {_e0 + _thermo.h_thermal:16.10f} Ha\n"
f" G = E(elec) + G_corr = {_e0 + _thermo.g_thermal:16.10f} Ha\n"
)
f.write("\n")
f.flush()
except Exception as _thermo_exc:
f.write(
f" (warning: thermochemistry not available: "
f"{type(_thermo_exc).__name__}: {_thermo_exc})\n\n"
)
f.flush()
except Exception as _hess_exc:
f.write(
f"\n ## Vibrational Frequencies\n"
f" " + "-" * 52 + "\n"
f" FAILED: {type(_hess_exc).__name__}: {_hess_exc}\n"
)
f.flush()
hessian_result = None
# --- QVF visualisation archive (v1) ----------------------------------
if output_qvf:
try:
from vibeqc.output.formats.qvf import (
scf_history_from_result as _scf_history_from_result,
)
from vibeqc.output.formats.qvf import write_qvf as _write_qvf
_qvf_path_for_warn = output_stem.with_suffix(".qvf")
_qvf_scf_history = _scf_history_from_result(result)
_bibtex = None
if _refs is not None:
try:
from vibeqc.output.citations.bibtex import format_bibtex
_bibtex = format_bibtex(_refs)
except Exception as _bib_exc:
warn_output_failure(
_bib_exc,
_qvf_path_for_warn,
role="qvf_bibtex_prep",
category=OutputFailureKind.compatibility_fallback,
)
_pop = None
if write_population_file and has_mos:
try:
from vibeqc.output.formats.population import (
compute_population_summary,
)
_pop = compute_population_summary(
result,
basis_obj,
molecule,
)
except Exception as _qpop_exc:
warn_output_failure(
_qpop_exc,
_qvf_path_for_warn,
role="qvf_population_prep",
category=OutputFailureKind.compatibility_fallback,
)
_qvf_vol_data = None
if has_mos and _cube_req.density:
try:
from vibeqc.output.formats.qvf import qvf_density_data
_qvf_vol_data = qvf_density_data(
result,
basis_obj,
molecule,
spacing=cube_spacing,
padding=cube_padding,
)
except Exception as _qvol_exc:
warn_output_failure(
_qvol_exc,
_qvf_path_for_warn,
role="qvf_density_prep",
category=OutputFailureKind.compatibility_fallback,
)
_qvf_mo_list = None
if has_mos and _cube_req.mo_labels:
try:
from vibeqc.output.formats.qvf import qvf_mo_data
_indices = requested_mo_indices(
_cube_req.mo_labels, result, molecule
)
_qvf_mo_list = qvf_mo_data(
result,
basis_obj,
molecule,
[int(i) for i, _ in _indices],
spacing=cube_spacing,
padding=cube_padding,
)
except Exception as _qmo_exc:
warn_output_failure(
_qmo_exc,
_qvf_path_for_warn,
role="qvf_mo_prep",
category=OutputFailureKind.compatibility_fallback,
)
# Package the wavefunction for the QVF archive so vibe-view
# can resample any MO on demand (wavefunction.gto section).
# Mirrors the periodic runner; gated on write_molden_file so
# the .molden sidecar and the embedded basis+MO stay in sync.
_qvf_wf = None
if write_molden_file and has_mos:
try:
from vibeqc.output.formats.qvf import qvf_wf_data
_qvf_wf = qvf_wf_data(result, basis_obj, molecule)
except Exception as _qwf_exc:
warn_output_failure(
_qwf_exc,
_qvf_path_for_warn,
role="qvf_wavefunction_prep",
category=OutputFailureKind.compatibility_fallback,
)
_qvf_path = _write_qvf(
output_stem,
_output_plan,
molecule=molecule,
result=result,
method=resolved_method,
basis=basis,
functional=functional,
population_summary=_pop,
bibtex_content=_bibtex,
wall_seconds=t_total,
trajectory_frames=_traj_frames if _traj_frames else None,
trajectory_energies=_traj_energies if _traj_energies else None,
volume_data=_qvf_vol_data,
mo_data=_qvf_mo_list,
wf_data=_qvf_wf,
hessian_result=hessian_result,
scf_history_data=_qvf_scf_history,
)
try:
_output_writer.record(_qvf_path, wall_time_s=t_total)
except Exception as _qvf_rec_exc:
warn_writer_failure(
_qvf_rec_exc,
_qvf_path,
role="qvf_manifest_record",
category=OutputFailureKind.manifest_recording,
writer=_output_writer,
)
except Exception as _qvf_exc:
warn_writer_failure(
_qvf_exc,
output_stem.with_suffix(".qvf"),
role="qvf_archive",
category=OutputFailureKind.optional_artifact,
writer=_output_writer,
)
# Finalise the {output}.system manifest (pre-v1.0 dispatch-
# overhaul — replaces the bare end-of-job write_system_manifest
# call). The OutputWriter, constructed before the SCF, already
# wrote the [vibeqc] / [host] / [cpu] / ... / [plan] sections +
# [outputs].status = "running". Here we (1) record every declared
# artefact that actually landed on disk into [[outputs.files]]
# — with bytes + sha256 + wall-time — so vq's fetch / status can
# see exactly what to copy back, and (2) flip the status to
# "complete" with the final wall-time. A non-converged SCF still
# counts as "complete": the job ran to completion; the .out + the
# result object carry the non-convergence.
for _pf in _output_plan.files:
if _pf.path.is_file():
try:
_output_writer.record(_pf.path)
except Exception as _rec_exc:
# record() stats + hashes the file; a brittle write
# at wrap-up must never tank a finished job. We do
# surface it though — a manifest with stale rows
# confuses ``vq fetch``.
warn_writer_failure(
_rec_exc,
_pf.path,
role=f"manifest_record_{_pf.role}",
category=OutputFailureKind.manifest_recording,
writer=_output_writer,
)
_output_writer.finish(wall_seconds=t_total)
return result
__all__ = ["run_job"]