Source code for vibeqc.properties

"""Post-SCF molecular properties: atomic charges, bond orders, dipole.

The standard "sanity-check" output you expect from every QC program,
built on top of a converged SCF result.

Public API
----------

.. autofunction:: mulliken_charges
.. autofunction:: loewdin_charges
.. autofunction:: mayer_bond_orders
.. autofunction:: dipole_moment
.. autofunction:: center_of_mass
.. autofunction:: natural_orbitals
.. autofunction:: idempotency_deviation

All functions accept the ``result`` object returned by
:func:`vibeqc.run_rhf`, :func:`vibeqc.run_uhf`, :func:`vibeqc.run_rks`,
or :func:`vibeqc.run_uks`; the matching :class:`Molecule` and
:class:`BasisSet` used to run the SCF; and for dipole moments an
optional origin.

Implementation notes
--------------------

Mulliken and Löwdin population analyses are AO-basis-dependent in well-
known ways — Mulliken in particular is very sensitive to diffuse
functions. The output is useful for trend-watching across a series
(e.g. charge transfer along a reaction coordinate) but atomic charges
beyond the leading digit should never be taken too seriously. Mayer
bond orders are rotation-invariant and less basis-sensitive.
"""

from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, Iterable, Optional, Sequence

import numpy as np

from ._vibeqc_core import (
    BasisSet,
    GridOptions,
    Molecule,
    build_grid,
    compute_dipole,
    compute_overlap,
    evaluate_ao,
    sad_density,
)

if TYPE_CHECKING:  # pragma: no cover — import cycle avoidance
    from ._vibeqc_core import RHFResult  # noqa: F401


__all__ = [
    "DipoleMoment",
    "HirshfeldResult",
    "NaturalOrbitals",
    "mulliken_charges",
    "loewdin_charges",
    "hirshfeld_charges",
    "mayer_bond_orders",
    "dipole_moment",
    "center_of_mass",
    "natural_orbitals",
    "idempotency_deviation",
]


# Standard atomic masses (u), index 0 unused.  Dalton approximate values —
# good to ~1e−3 u — plenty for center-of-mass computation. We ship a
# subset sufficient for H–Kr; for heavier elements callers pass an
# explicit origin.
_ATOMIC_MASSES: tuple[float, ...] = (
    0.0,
    1.008,   4.003,                                              # H  He
    6.94,    9.012,  10.81,  12.011, 14.007, 15.999, 18.998, 20.180,  # Li–Ne
    22.990, 24.305, 26.982, 28.085, 30.974, 32.06,  35.45,  39.948,   # Na–Ar
    39.098, 40.078, 44.956, 47.867, 50.942, 51.996, 54.938, 55.845,   # K–Fe
    58.933, 58.693, 63.546, 65.38,  69.723, 72.630, 74.922, 78.971,   # Co–Se
    79.904, 83.798,                                                    # Br Kr
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _real_if_hermitian(P: np.ndarray, *, what: str = "density matrix") -> np.ndarray:
    """Return the real part of a complex-but-Hermitian density matrix.

    Periodic SCF results carry a *complex-typed* density that is Hermitian
    with a machine-noise imaginary part. The molecular-property code forms
    **real observables** — ``tr(P·O)`` (dipole, Mayer bond order), per-atom
    ``(P·S)`` population sums, the density on a real grid (Hirshfeld) — for
    which the imaginary part of a Hermitian ``P`` contracted with a real
    operator cancels exactly. Returning the real part keeps those
    observables real and stops a periodic run from emitting a
    ``ComplexWarning`` on every property (the silent ``float(...)`` /
    ``float64`` casts that warning came from also discarded the imaginary
    part — this does it once, intentionally, after a Hermiticity check).

    A *non-negligible* imaginary part means a genuinely non-Hermitian
    density (a bug), surfaced with an actionable warning rather than
    discarded silently. Real-typed input is returned unchanged (molecular
    RHF/RKS/UHF/UKS path — zero behaviour change).
    """
    P = np.asarray(P)
    if not np.iscomplexobj(P):
        return P
    if P.size:
        max_im = float(np.abs(P.imag).max())
        max_re = float(np.abs(P.real).max())
        if max_im > 1e-8 * max(max_re, 1.0):
            warnings.warn(
                f"{what} has a non-negligible imaginary part "
                f"(max|Im|={max_im:.2e} vs max|Re|={max_re:.2e}); molecular-"
                "property values are derived from its real part and may be "
                "unreliable. This usually indicates a non-Hermitian density "
                "matrix (a bug) rather than a well-converged periodic SCF.",
                RuntimeWarning,
                stacklevel=3,
            )
    return np.ascontiguousarray(P.real)


def _total_density(result) -> np.ndarray:
    """Return the total (closed-shell + open-shell) density matrix.

    RHF/RKS store the already-combined density on ``.density``; UHF/UKS
    expose ``density_alpha`` and ``density_beta`` separately. Complex
    periodic densities are reduced to their (Hermitian) real part via
    :func:`_real_if_hermitian` so downstream observables stay real.
    """
    if hasattr(result, "density_alpha"):
        P = np.asarray(result.density_alpha) + np.asarray(result.density_beta)
    else:
        P = np.asarray(result.density)
    return _real_if_hermitian(P)


def _shell_to_atom(basis: BasisSet) -> np.ndarray:
    """1-D int array, length ``nbasis``, mapping each AO to the 0-based
    atom index it lives on. Computed from ``basis.shells()`` (which is
    public C++ API exposed for basis-set I/O)."""
    shells = basis.shells()
    per_ao: list[int] = []
    for shell in shells:
        # 2L+1 spherical AOs per shell (vibe-qc forces set_pure(true)
        # at basis construction; Cartesian shells would have a
        # different multiplicity, but that is not our configuration).
        n = 2 * shell.l + 1
        per_ao.extend([int(shell.atom_index)] * n)
    return np.asarray(per_ao, dtype=np.int64)


def _per_atom_sum(ao_values: np.ndarray, ao_to_atom: np.ndarray,
                  n_atoms: int) -> np.ndarray:
    """Sum an nbasis-length array into per-atom totals."""
    out = np.zeros(n_atoms, dtype=np.float64)
    for a, v in zip(ao_to_atom, ao_values):
        out[a] += v
    return out


[docs] def center_of_mass(molecule: Molecule) -> np.ndarray: """Center of mass (bohr). Atomic masses from a built-in table up to Z = 36; callers with heavier elements should specify the origin to dipole_moment directly.""" atoms = list(molecule.atoms) if not atoms: return np.zeros(3) total_mass = 0.0 com = np.zeros(3) for atom in atoms: z = int(atom.Z) if z < len(_ATOMIC_MASSES): m = _ATOMIC_MASSES[z] else: # Crude fall-back: 2 u per nucleon ≈ A, and A ≈ 2 Z on average # for light elements — good enough not to throw. Heavy-element # users should pass an explicit origin. m = 2.0 * z pos = np.array([atom.xyz[0], atom.xyz[1], atom.xyz[2]]) com += m * pos total_mass += m if total_mass == 0.0: return np.zeros(3) return com / total_mass
# --------------------------------------------------------------------------- # Mulliken population analysis # ---------------------------------------------------------------------------
[docs] def mulliken_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray: """Mulliken atomic partial charges q_A = Z_A − Σ_{μ ∈ A} (P·S)_μμ. Returns a 1-D array of length ``n_atoms``. Charges sum to the total molecular charge (``molecule.charge``) to machine precision. The overall partition is AO-basis-dependent; Mulliken is sensitive to diffuse functions and should be used for trend analysis rather than for quantitative charge assignment. """ P = _total_density(result) S = np.asarray(compute_overlap(basis)) PS_diag = np.einsum("ij,ji->i", P, S) # diag(P · S) ao_to_atom = _shell_to_atom(basis) n_atoms = len(molecule.atoms) electron_pop = _per_atom_sum(PS_diag, ao_to_atom, n_atoms) Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64) return Z - electron_pop
# --------------------------------------------------------------------------- # Löwdin population analysis # ---------------------------------------------------------------------------
[docs] def loewdin_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray: """Löwdin (symmetric orthogonalisation) atomic partial charges: q_A = Z_A − Σ_{μ ∈ A} (S^{1/2} · P · S^{1/2})_μμ. Compared to Mulliken this partitions the off-diagonal overlap equally between the two AOs involved — less basis-sensitive in practice. """ P = _total_density(result) S = np.asarray(compute_overlap(basis)) # S = U · diag(s) · U.T → S^{1/2} = U · diag(√s) · U.T w, V = np.linalg.eigh(S) if np.min(w) < 1e-10: # Defensive: raise the way run_rhf's symmetric-orthogonalisation # does. In practice vibe-qc's basis sets never trigger this on # molecules; periodic near-linear-dependence surfaces earlier. raise ValueError( f"loewdin_charges: overlap matrix is near-singular " f"(min eigenvalue {np.min(w):.2e})" ) S_half = V @ np.diag(np.sqrt(w)) @ V.T SPS = S_half @ P @ S_half diag = np.diag(SPS) ao_to_atom = _shell_to_atom(basis) n_atoms = len(molecule.atoms) electron_pop = _per_atom_sum(diag, ao_to_atom, n_atoms) Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64) return Z - electron_pop
# --------------------------------------------------------------------------- # Hirshfeld population analysis # ---------------------------------------------------------------------------
[docs] @dataclass class HirshfeldResult: """Output of :func:`hirshfeld_charges`. Attributes ---------- charges : np.ndarray ``(n_atoms,)`` array of Hirshfeld atomic partial charges in electrons. Sums to ``molecule.charge`` to grid precision. Sign convention matches :func:`mulliken_charges` / :func:`loewdin_charges` (positive = electron-deficient). electron_population : np.ndarray ``(n_atoms,)`` array of ∫ w_A(r) ρ(r) dV — the Hirshfeld- partitioned electron count on each atom. ``Z_A − this`` is ``charges[A]``. promolecule_norm : float ∫ ρ_pro dV evaluated on the grid; should ≈ n_electrons. Diagnostic: when this deviates by > 1e-3 from the integer electron count, the integration grid is too coarse or the SAD promolecule didn't converge for some atom (very rare). molecule_norm : float ∫ ρ_mol dV evaluated on the same grid; should also ≈ n_electrons. Comparing the two norms tells you whether grid error is in the molecular density or the promolecule. n_grid_points : int Total Becke-Lebedev-Treutler grid point count used. Scales with ``GridOptions.n_radial × angular order × n_atoms``. """ charges: np.ndarray electron_population: np.ndarray promolecule_norm: float molecule_norm: float n_grid_points: int def __repr__(self) -> str: return ( f"HirshfeldResult(charges=<{len(self.charges)} atoms>, " f"Σq={self.charges.sum():+.6f}, " f"n_e(mol)={self.molecule_norm:.4f}, " f"n_e(pro)={self.promolecule_norm:.4f}, " f"n_grid={self.n_grid_points})" )
[docs] def hirshfeld_charges( result, basis: BasisSet, molecule: Molecule, *, grid_options: Optional[GridOptions] = None, rho_floor: float = 1.0e-30, max_block_elems: int = 50_000_000, ) -> HirshfeldResult: """Classical Hirshfeld atomic partial charges from a converged SCF. Hirshfeld, F. L. *Theor. Chim. Acta* **44**, 129 (1977). q_A = Z_A − ∫ w_A(r) ρ(r) d³r, w_A(r) = ρ_A^free(r) / Σ_B ρ_B^free(r). Unlike :func:`mulliken_charges` and :func:`loewdin_charges`, Hirshfeld is a *real-space* partition rather than a basis-space partition — far less sensitive to diffuse functions, and the standard input charge for charge-dependent dispersion methods like the D4 refinement scheduled for vibe-qc v0.10.0 D2b. The promolecular reference {ρ_A^free} is obtained for free from :func:`vibeqc.sad_density` — it returns the SAD initial-guess density matrix, which is block-diagonal by atom (each atomic SCF runs in vacuum, contributes to its own AO range, off- diagonal blocks are zero). Restricting the AO sum to atom A's basis-function range gives ρ_A^free evaluated at the molecular geometry. No per-atom SCF, no ionic-fragment branching. Parameters ---------- result SCF result from ``vibeqc.run_rhf`` / ``run_rks`` / ``run_uhf`` / ``run_uks``. The total density matrix is extracted via :func:`_total_density` (handles both the closed-shell ``.density`` and the open-shell ``.density_alpha`` + ``.density_beta`` schemas). basis The same :class:`BasisSet` used to run the SCF. molecule The same :class:`Molecule` used to run the SCF. grid_options Optional :class:`GridOptions` for the Becke-Lebedev- Treutler integration grid. Defaults to ``GridOptions()`` (vibe-qc's DFT-default level). Hirshfeld weights are smooth so the default grid is plenty for sub-millielectron charge accuracy; tighten only for explicit grid-convergence studies. rho_floor Promolecule density floor to keep w_A = ρ_A / ρ_pro well-defined in vacuum regions far from any atom. Default ``1e-30`` is well below any grid point that contributes meaningfully to the integral. max_block_elems Memory cap for the grid sweep. The AO matrix χ_μ(r_g) is ``(n_block, n_bf)``; the grid is processed in blocks sized so ``n_block × n_bf`` never exceeds this many elements (~``8 × max_block_elems`` bytes of float64). Default ``5×10⁷`` ≈ 400 MB per block. Small molecules fit in a single block (no behaviour change); large systems are swept block-by-block so the function never materialises a multi-gigabyte AO matrix. The result is independent of the block size to floating-point round-off. Returns ------- HirshfeldResult Rich dataclass; ``HirshfeldResult.charges`` is the ``(n_atoms,)`` array if you want bare-array semantics matching :func:`mulliken_charges`. The other fields carry per-atom integrated electron populations and grid- normalisation sanity numbers. Notes ----- Numerical-quality knobs: * ``∫ ρ_mol − n_electrons`` should be < 5×10⁻⁵ on the default grid for typical first-row systems. * ``Σ q_A − molecule.charge`` should be < 1×10⁻⁶ e (exact identity from the Hirshfeld weight normalisation; only grid error introduces residual). Classical Hirshfeld charges in vibe-qc come out ~0.04-0.06 e *more negative* on heavy atoms than ORCA-reported values because the promolecule is constructed in the molecular basis (SAD-derived) rather than from tabulated Slater-type atomic densities. Documentable trade-off — switch sources via a future ``promolecule="slater"`` kwarg if byte-equal ORCA parity matters. The iterative Hirshfeld variant (Bultinck et al., *J. Chem. Phys.* **126**, 144111 (2007)) — which is less promolecule- sensitive — is a clean follow-on; same API, different inner loop. Tracked as a v0.10.0 D2b-i candidate. """ grid = build_grid(molecule, grid_options if grid_options is not None else GridOptions()) points = np.asarray(grid.points, dtype=np.float64) weights = np.asarray(grid.weights, dtype=np.float64) n_grid = points.shape[0] n_bf = basis.nbasis n_atoms = len(molecule.atoms) P_mol = _total_density(result) # Promolecular density matrix is the SAD guess (block-diagonal # by atom — see docstring above). P_pro = np.asarray(sad_density(molecule, basis), dtype=np.float64) ao_to_atom = _shell_to_atom(basis) # Precompute per-atom AO masks + the diagonal P_pro blocks once. atom_masks = [(ao_to_atom == A) for A in range(n_atoms)] atom_P_blocks = [ (P_pro[np.ix_(m, m)] if m.any() else None) for m in atom_masks ] # Process the grid in blocks so the (n_block, n_bf) AO matrix # never exceeds ``max_block_elems`` float64 entries. Small # molecules collapse to a single block (identical to the old # one-shot path); large systems are swept without ever holding # a multi-GB χ matrix in memory. block = max(1, int(max_block_elems // max(n_bf, 1))) electron_pop = np.zeros(n_atoms, dtype=np.float64) promolecule_norm = 0.0 molecule_norm = 0.0 for start in range(0, n_grid, block): stop = min(start + block, n_grid) pts = points[start:stop] w = weights[start:stop] # χ_μ(r_g) for this block: (n_block, n_bf). chi = np.asarray(evaluate_ao(basis, pts), dtype=np.float64) # ρ_mol(r_g) = Σ_μν P_μν χ_μ χ_ν — (P·χᵀ)·χ row-wise. rho_mol = np.einsum("gm,gm->g", chi @ P_mol, chi) # ρ_A^free(r_g) per atom — AO sum restricted to A's range # thanks to P_pro's block-diagonal structure. rho_atoms = np.zeros((n_atoms, stop - start), dtype=np.float64) for A in range(n_atoms): mask = atom_masks[A] P_AA = atom_P_blocks[A] if P_AA is None: # ghost atom — row stays 0 continue chi_A = chi[:, mask] rho_atoms[A] = np.einsum("gm,gm->g", chi_A @ P_AA, chi_A) rho_pro = rho_atoms.sum(axis=0) # Hirshfeld weights with a ρ_pro floor for far-out points # that would otherwise be ~0/~0. safe_pro = np.maximum(rho_pro, rho_floor) weights_atoms = rho_atoms / safe_pro electron_pop += weights_atoms @ (w * rho_mol) promolecule_norm += float((w * rho_pro).sum()) molecule_norm += float((w * rho_mol).sum()) Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64) charges = Z - electron_pop return HirshfeldResult( charges=charges, electron_population=electron_pop, promolecule_norm=promolecule_norm, molecule_norm=molecule_norm, n_grid_points=n_grid, )
# --------------------------------------------------------------------------- # Mayer bond orders # ---------------------------------------------------------------------------
[docs] def mayer_bond_orders(result, basis: BasisSet, molecule: Molecule) -> np.ndarray: """Mayer bond-order matrix, shape (n_atoms, n_atoms). For closed-shell systems: B_AB = Σ_{μ ∈ A} Σ_{ν ∈ B} (P·S)_μν · (P·S)_νμ For UHF / UKS we replace (P·S)(P·S) with the spin-resolved definition 2·[(Pα·S)(Pα·S) + (Pβ·S)(Pβ·S)] which reduces to the closed-shell formula when Pα = Pβ = P/2. Diagonal entries are the "free valence" of each atom. The off- diagonals are the chemically-meaningful bond orders. """ S = np.asarray(compute_overlap(basis)) ao_to_atom = _shell_to_atom(basis) n_atoms = len(molecule.atoms) if hasattr(result, "density_alpha"): # Real part for complex (periodic, Hermitian) densities — the Mayer # bond order is a real observable; see _real_if_hermitian. Pa = _real_if_hermitian(result.density_alpha, what="alpha density") Pb = _real_if_hermitian(result.density_beta, what="beta density") PS_a = Pa @ S PS_b = Pb @ S # Element-wise Mayer: M_μν = 2·[(PS_a)_μν (PS_a)_νμ # + (PS_b)_μν (PS_b)_νμ] M = 2.0 * (PS_a * PS_a.T + PS_b * PS_b.T) else: P = _real_if_hermitian(result.density) PS = P @ S M = PS * PS.T # broadcasting element-wise; equivalent to # M_μν = (PS)_μν · (PS)_νμ since the matrix is # real. bond_orders = np.zeros((n_atoms, n_atoms), dtype=np.float64) for mu in range(M.shape[0]): a = ao_to_atom[mu] for nu in range(M.shape[1]): b = ao_to_atom[nu] if a != b: bond_orders[a, b] += M[mu, nu] # Symmetrize numerically — analytical B_AB = B_BA for real AOs. bond_orders = 0.5 * (bond_orders + bond_orders.T) return bond_orders
def prominent_bonds( bond_orders: np.ndarray, molecule: Molecule, *, threshold: float = 0.10, ) -> list[tuple[int, int, float]]: """Return ``[(i, j, B_ij)]`` pairs with ``i < j`` and ``B_ij >= threshold`` — convenience for formatting a compact bond-order table in the log output.""" n = bond_orders.shape[0] out: list[tuple[int, int, float]] = [] for i in range(n): for j in range(i + 1, n): if bond_orders[i, j] >= threshold: out.append((i, j, float(bond_orders[i, j]))) # Sort by descending bond order so the covalent bonds float to the top. out.sort(key=lambda t: -t[2]) return out # --------------------------------------------------------------------------- # Dipole moment # --------------------------------------------------------------------------- _BOHR_TO_DEBYE = 2.541746473 # 1 e·bohr = 2.541746473 Debye
[docs] @dataclass class DipoleMoment: """Dipole moment components in atomic units (e·bohr), plus Debye.""" x: float y: float z: float origin: tuple[float, float, float] @property def total(self) -> float: r"""\|mu\| in atomic units (e\*bohr).""" return float(np.sqrt(self.x ** 2 + self.y ** 2 + self.z ** 2)) @property def total_debye(self) -> float: return self.total * _BOHR_TO_DEBYE
[docs] def components_debye(self) -> tuple[float, float, float]: return (self.x * _BOHR_TO_DEBYE, self.y * _BOHR_TO_DEBYE, self.z * _BOHR_TO_DEBYE)
[docs] def dipole_moment( result, basis: BasisSet, molecule: Molecule, *, origin: Optional[Sequence[float]] = None, ) -> DipoleMoment: """Electric dipole moment of a converged SCF calculation. ``origin`` (bohr) defaults to the molecular center of mass, which makes the dipole origin-independent for neutral systems (the convention every standard QC code uses). For charged systems the dipole depends on origin; pass an explicit vector if you need a particular reference. """ if origin is None: origin_vec = center_of_mass(molecule) else: origin_vec = np.asarray(origin, dtype=np.float64) if origin_vec.shape != (3,): raise ValueError("dipole_moment: origin must be a 3-vector (bohr)") dip = compute_dipole(basis, [float(x) for x in origin_vec]) Mx = np.asarray(dip.x) My = np.asarray(dip.y) Mz = np.asarray(dip.z) P = _total_density(result) # Electronic contribution (electrons are negative): -tr(P · M_c). mu_e_x = -np.einsum("ij,ji->", P, Mx) mu_e_y = -np.einsum("ij,ji->", P, My) mu_e_z = -np.einsum("ij,ji->", P, Mz) # Nuclear contribution (with origin shift): Σ_A Z_A (R_A − O). mu_n_x = 0.0 mu_n_y = 0.0 mu_n_z = 0.0 for atom in molecule.atoms: z = float(atom.Z) mu_n_x += z * (atom.xyz[0] - origin_vec[0]) mu_n_y += z * (atom.xyz[1] - origin_vec[1]) mu_n_z += z * (atom.xyz[2] - origin_vec[2]) return DipoleMoment( x=float(mu_e_x + mu_n_x), y=float(mu_e_y + mu_n_y), z=float(mu_e_z + mu_n_z), origin=(float(origin_vec[0]), float(origin_vec[1]), float(origin_vec[2])), )
# --------------------------------------------------------------------------- # Natural orbitals # --------------------------------------------------------------------------- @dataclass class NaturalOrbitals: """Natural orbitals + occupations, sorted by descending occupation. Attributes ---------- occupations ``(n_bf,)`` real array. For ``kind="rhf"`` and ``kind="uhf-total"`` occupations are in ``[0, 2]``; for ``kind="uhf-alpha"`` / ``kind="uhf-beta"`` they are in ``[0, 1]``; for ``kind="uhf-spin"`` they are in ``[-1, 1]``. coefficients ``(n_bf, n_bf)`` real matrix. Each column is one NO expressed in the AO basis, S-normalized: ``C^T S C = I``. Columns are ordered by descending occupation so ``coefficients[:, :n_occ]`` is the natural-occupation analogue of "occupied MOs". kind One of ``"rhf"``, ``"uhf-total"``, ``"uhf-alpha"``, ``"uhf-beta"``, ``"uhf-spin"`` — describes which density matrix was diagonalized so the user knows how to interpret occupations. """ occupations: np.ndarray coefficients: np.ndarray kind: str @property def n_orbitals(self) -> int: return int(self.occupations.size) @property def n_electrons(self) -> float: """Sum of occupations — equals the underlying electron count (or ``N_α − N_β`` for ``kind="uhf-spin"``) up to FP noise.""" return float(self.occupations.sum()) def _diagonalise_density(D: np.ndarray, S: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Solve ``D S c_i = n_i c_i`` via the Löwdin route. Returns ``(occupations, coefficients)`` sorted by *descending* occupation with the AO-basis NOs S-normalized (``C^T S C = I``). Numerically: form ``D̃ = S^{1/2} D S^{1/2}`` (real symmetric), diagonalize, then transform eigenvectors back via ``C = S^{-1/2} U``. This avoids the non-symmetric eigenproblem ``D · S`` (which has real eigenvalues but complex-arithmetic eigenvectors).""" s_eig, U_S = np.linalg.eigh(S) if np.min(s_eig) < 1e-10: raise ValueError( f"natural_orbitals: overlap is near-singular " f"(min eigenvalue {np.min(s_eig):.2e})" ) sqrt_s = np.sqrt(s_eig) inv_sqrt_s = 1.0 / sqrt_s S_half = U_S @ np.diag(sqrt_s) @ U_S.T S_inv_half = U_S @ np.diag(inv_sqrt_s) @ U_S.T D_tilde = S_half @ D @ S_half # Force-symmetrize — D̃ is exactly symmetric in exact arithmetic. D_tilde = 0.5 * (D_tilde + D_tilde.T) n, U = np.linalg.eigh(D_tilde) # eigh returns ascending; flip to descending occupation. order = np.argsort(-n) n = n[order] U = U[:, order] C = S_inv_half @ U return n, C def natural_orbitals(result, basis: BasisSet, *, kind: str = "auto") -> NaturalOrbitals: """Diagonalize the SCF one-particle density matrix to get natural orbitals + occupations. For a single-determinant RHF/RKS the occupations come out exactly integer (2.0 for occupied, 0.0 for virtual) and the NOs span the same occupied/virtual subspaces as the canonical MOs (different rotations within those subspaces only). For UHF/UKS the *total* natural orbitals (default) carry fractional occupations whose deviation from {0, 2} measures spin contamination / multireference character; the *spin* natural orbitals (``kind="uhf-spin"``) carry the unpaired-electron distribution whose largest-eigenvalue magnitudes localise the open-shell character. Parameters ---------- result Output of :func:`vibeqc.run_rhf`, :func:`vibeqc.run_rks`, :func:`vibeqc.run_uhf`, or :func:`vibeqc.run_uks`. basis The same :class:`BasisSet` used to run the SCF (the AO overlap is recomputed from it). kind ``"auto"`` (default) chooses ``"rhf"`` for closed-shell results and ``"uhf-total"`` for open-shell. Other accepted values: * ``"rhf"`` — for an RHF/RKS result, diagonalize ``D`` directly (occupations 0..2). * ``"uhf-total"`` — for a UHF/UKS result, diagonalize ``D_α + D_β`` (occupations 0..2, sum = N_e). * ``"uhf-alpha"`` / ``"uhf-beta"`` — diagonalize ``D_α`` or ``D_β`` alone (occupations 0..1, sum = N_α or N_β). * ``"uhf-spin"`` — diagonalize the spin density ``D_α − D_β`` (occupations −1..1, sum = N_α − N_β). Eigenvectors with the largest |occupation| pick out where the unpaired spins live. """ is_open_shell = hasattr(result, "density_alpha") if kind == "auto": kind = "uhf-total" if is_open_shell else "rhf" if kind == "rhf": if is_open_shell: raise ValueError( "natural_orbitals: kind='rhf' requires a closed-shell " "(RHF/RKS) result; got an open-shell result. Use " "kind='uhf-total' instead." ) D = np.asarray(result.density) elif kind == "uhf-total": if not is_open_shell: raise ValueError( "natural_orbitals: kind='uhf-total' requires an " "open-shell (UHF/UKS) result." ) D = (np.asarray(result.density_alpha) + np.asarray(result.density_beta)) elif kind == "uhf-alpha": if not is_open_shell: raise ValueError( "natural_orbitals: kind='uhf-alpha' requires an " "open-shell result.") D = np.asarray(result.density_alpha) elif kind == "uhf-beta": if not is_open_shell: raise ValueError( "natural_orbitals: kind='uhf-beta' requires an " "open-shell result.") D = np.asarray(result.density_beta) elif kind == "uhf-spin": if not is_open_shell: raise ValueError( "natural_orbitals: kind='uhf-spin' requires an " "open-shell result.") D = (np.asarray(result.density_alpha) - np.asarray(result.density_beta)) else: raise ValueError( f"natural_orbitals: unknown kind={kind!r}. Valid: 'auto', " f"'rhf', 'uhf-total', 'uhf-alpha', 'uhf-beta', 'uhf-spin'." ) S = np.asarray(compute_overlap(basis)) occupations, coefficients = _diagonalise_density(D, S) return NaturalOrbitals( occupations=occupations, coefficients=coefficients, kind=kind, ) def idempotency_deviation(no: NaturalOrbitals) -> float: """Scalar measure of how far the density matrix is from a single Slater determinant. Larger values flag multireference character or spin contamination. For ``kind="rhf"`` / ``kind="uhf-total"`` (occupations in [0, 2]): Δ = Σ_i n_i (2 − n_i) / 2 A pure single-determinant Hartree–Fock state gives ``Δ = 0`` (every NO is exactly 0 or 2). For UHF the value is the standard "non-idempotency" diagnostic — small for well-behaved closed-shell systems and growing with spin contamination. For ``kind="uhf-alpha"``, ``"uhf-beta"`` (occupations in [0, 1]): Δ = Σ_i n_i (1 − n_i) For ``kind="uhf-spin"`` (occupations in [−1, 1]): Δ = Σ_i (1 − n_i²) / 2 (Yamaguchi-style estimate of the number of unpaired electrons; the proper "N_unpaired" via the Head-Gordon definition uses 2·(D_α D_β S) eigenvalues, which this function does not compute.) """ n = np.asarray(no.occupations, dtype=float) if no.kind in ("rhf", "uhf-total"): return 0.5 * float(np.sum(n * (2.0 - n))) if no.kind in ("uhf-alpha", "uhf-beta"): return float(np.sum(n * (1.0 - n))) if no.kind == "uhf-spin": return 0.5 * float(np.sum(1.0 - n * n)) raise ValueError(f"idempotency_deviation: unknown NO kind {no.kind!r}")