"""Post-SCF molecular properties: atomic charges, bond orders, dipole.
The standard "sanity-check" output you expect from every QC program,
built on top of a converged SCF result.
Public API
----------
.. autofunction:: mulliken_charges
.. autofunction:: loewdin_charges
.. autofunction:: mayer_bond_orders
.. autofunction:: dipole_moment
.. autofunction:: center_of_mass
.. autofunction:: natural_orbitals
.. autofunction:: idempotency_deviation
All functions accept the ``result`` object returned by
:func:`vibeqc.run_rhf`, :func:`vibeqc.run_uhf`, :func:`vibeqc.run_rks`,
or :func:`vibeqc.run_uks`; the matching :class:`Molecule` and
:class:`BasisSet` used to run the SCF; and for dipole moments an
optional origin.
Implementation notes
--------------------
Mulliken and Löwdin population analyses are AO-basis-dependent in well-
known ways — Mulliken in particular is very sensitive to diffuse
functions. The output is useful for trend-watching across a series
(e.g. charge transfer along a reaction coordinate) but atomic charges
beyond the leading digit should never be taken too seriously. Mayer
bond orders are rotation-invariant and less basis-sensitive.
"""
from __future__ import annotations
import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, Iterable, Optional, Sequence
import numpy as np
from ._vibeqc_core import (
BasisSet,
GridOptions,
Molecule,
build_grid,
compute_dipole,
compute_overlap,
evaluate_ao,
sad_density,
)
if TYPE_CHECKING: # pragma: no cover — import cycle avoidance
from ._vibeqc_core import RHFResult # noqa: F401
__all__ = [
"DipoleMoment",
"HirshfeldResult",
"NaturalOrbitals",
"mulliken_charges",
"loewdin_charges",
"hirshfeld_charges",
"mayer_bond_orders",
"dipole_moment",
"center_of_mass",
"natural_orbitals",
"idempotency_deviation",
]
# Standard atomic masses (u), index 0 unused. Dalton approximate values —
# good to ~1e−3 u — plenty for center-of-mass computation. We ship a
# subset sufficient for H–Kr; for heavier elements callers pass an
# explicit origin.
_ATOMIC_MASSES: tuple[float, ...] = (
0.0,
1.008, 4.003, # H He
6.94, 9.012, 10.81, 12.011, 14.007, 15.999, 18.998, 20.180, # Li–Ne
22.990, 24.305, 26.982, 28.085, 30.974, 32.06, 35.45, 39.948, # Na–Ar
39.098, 40.078, 44.956, 47.867, 50.942, 51.996, 54.938, 55.845, # K–Fe
58.933, 58.693, 63.546, 65.38, 69.723, 72.630, 74.922, 78.971, # Co–Se
79.904, 83.798, # Br Kr
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _real_if_hermitian(P: np.ndarray, *, what: str = "density matrix") -> np.ndarray:
"""Return the real part of a complex-but-Hermitian density matrix.
Periodic SCF results carry a *complex-typed* density that is Hermitian
with a machine-noise imaginary part. The molecular-property code forms
**real observables** — ``tr(P·O)`` (dipole, Mayer bond order), per-atom
``(P·S)`` population sums, the density on a real grid (Hirshfeld) — for
which the imaginary part of a Hermitian ``P`` contracted with a real
operator cancels exactly. Returning the real part keeps those
observables real and stops a periodic run from emitting a
``ComplexWarning`` on every property (the silent ``float(...)`` /
``float64`` casts that warning came from also discarded the imaginary
part — this does it once, intentionally, after a Hermiticity check).
A *non-negligible* imaginary part means a genuinely non-Hermitian
density (a bug), surfaced with an actionable warning rather than
discarded silently. Real-typed input is returned unchanged (molecular
RHF/RKS/UHF/UKS path — zero behaviour change).
"""
P = np.asarray(P)
if not np.iscomplexobj(P):
return P
if P.size:
max_im = float(np.abs(P.imag).max())
max_re = float(np.abs(P.real).max())
if max_im > 1e-8 * max(max_re, 1.0):
warnings.warn(
f"{what} has a non-negligible imaginary part "
f"(max|Im|={max_im:.2e} vs max|Re|={max_re:.2e}); molecular-"
"property values are derived from its real part and may be "
"unreliable. This usually indicates a non-Hermitian density "
"matrix (a bug) rather than a well-converged periodic SCF.",
RuntimeWarning,
stacklevel=3,
)
return np.ascontiguousarray(P.real)
def _total_density(result) -> np.ndarray:
"""Return the total (closed-shell + open-shell) density matrix.
RHF/RKS store the already-combined density on ``.density``; UHF/UKS
expose ``density_alpha`` and ``density_beta`` separately. Complex
periodic densities are reduced to their (Hermitian) real part via
:func:`_real_if_hermitian` so downstream observables stay real.
"""
if hasattr(result, "density_alpha"):
P = np.asarray(result.density_alpha) + np.asarray(result.density_beta)
else:
P = np.asarray(result.density)
return _real_if_hermitian(P)
def _shell_to_atom(basis: BasisSet) -> np.ndarray:
"""1-D int array, length ``nbasis``, mapping each AO to the 0-based
atom index it lives on. Computed from ``basis.shells()`` (which is
public C++ API exposed for basis-set I/O)."""
shells = basis.shells()
per_ao: list[int] = []
for shell in shells:
# 2L+1 spherical AOs per shell (vibe-qc forces set_pure(true)
# at basis construction; Cartesian shells would have a
# different multiplicity, but that is not our configuration).
n = 2 * shell.l + 1
per_ao.extend([int(shell.atom_index)] * n)
return np.asarray(per_ao, dtype=np.int64)
def _per_atom_sum(ao_values: np.ndarray, ao_to_atom: np.ndarray,
n_atoms: int) -> np.ndarray:
"""Sum an nbasis-length array into per-atom totals."""
out = np.zeros(n_atoms, dtype=np.float64)
for a, v in zip(ao_to_atom, ao_values):
out[a] += v
return out
[docs]
def center_of_mass(molecule: Molecule) -> np.ndarray:
"""Center of mass (bohr). Atomic masses from a built-in table up to
Z = 36; callers with heavier elements should specify the origin to
dipole_moment directly."""
atoms = list(molecule.atoms)
if not atoms:
return np.zeros(3)
total_mass = 0.0
com = np.zeros(3)
for atom in atoms:
z = int(atom.Z)
if z < len(_ATOMIC_MASSES):
m = _ATOMIC_MASSES[z]
else:
# Crude fall-back: 2 u per nucleon ≈ A, and A ≈ 2 Z on average
# for light elements — good enough not to throw. Heavy-element
# users should pass an explicit origin.
m = 2.0 * z
pos = np.array([atom.xyz[0], atom.xyz[1], atom.xyz[2]])
com += m * pos
total_mass += m
if total_mass == 0.0:
return np.zeros(3)
return com / total_mass
# ---------------------------------------------------------------------------
# Mulliken population analysis
# ---------------------------------------------------------------------------
[docs]
def mulliken_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray:
"""Mulliken atomic partial charges q_A = Z_A − Σ_{μ ∈ A} (P·S)_μμ.
Returns a 1-D array of length ``n_atoms``. Charges sum to the total
molecular charge (``molecule.charge``) to machine precision. The
overall partition is AO-basis-dependent; Mulliken is sensitive to
diffuse functions and should be used for trend analysis rather than
for quantitative charge assignment.
"""
P = _total_density(result)
S = np.asarray(compute_overlap(basis))
PS_diag = np.einsum("ij,ji->i", P, S) # diag(P · S)
ao_to_atom = _shell_to_atom(basis)
n_atoms = len(molecule.atoms)
electron_pop = _per_atom_sum(PS_diag, ao_to_atom, n_atoms)
Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
return Z - electron_pop
# ---------------------------------------------------------------------------
# Löwdin population analysis
# ---------------------------------------------------------------------------
[docs]
def loewdin_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray:
"""Löwdin (symmetric orthogonalisation) atomic partial charges:
q_A = Z_A − Σ_{μ ∈ A} (S^{1/2} · P · S^{1/2})_μμ.
Compared to Mulliken this partitions the off-diagonal overlap
equally between the two AOs involved — less basis-sensitive in
practice.
"""
P = _total_density(result)
S = np.asarray(compute_overlap(basis))
# S = U · diag(s) · U.T → S^{1/2} = U · diag(√s) · U.T
w, V = np.linalg.eigh(S)
if np.min(w) < 1e-10:
# Defensive: raise the way run_rhf's symmetric-orthogonalisation
# does. In practice vibe-qc's basis sets never trigger this on
# molecules; periodic near-linear-dependence surfaces earlier.
raise ValueError(
f"loewdin_charges: overlap matrix is near-singular "
f"(min eigenvalue {np.min(w):.2e})"
)
S_half = V @ np.diag(np.sqrt(w)) @ V.T
SPS = S_half @ P @ S_half
diag = np.diag(SPS)
ao_to_atom = _shell_to_atom(basis)
n_atoms = len(molecule.atoms)
electron_pop = _per_atom_sum(diag, ao_to_atom, n_atoms)
Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
return Z - electron_pop
# ---------------------------------------------------------------------------
# Hirshfeld population analysis
# ---------------------------------------------------------------------------
[docs]
@dataclass
class HirshfeldResult:
"""Output of :func:`hirshfeld_charges`.
Attributes
----------
charges : np.ndarray
``(n_atoms,)`` array of Hirshfeld atomic partial charges in
electrons. Sums to ``molecule.charge`` to grid precision.
Sign convention matches :func:`mulliken_charges` /
:func:`loewdin_charges` (positive = electron-deficient).
electron_population : np.ndarray
``(n_atoms,)`` array of ∫ w_A(r) ρ(r) dV — the Hirshfeld-
partitioned electron count on each atom. ``Z_A − this`` is
``charges[A]``.
promolecule_norm : float
∫ ρ_pro dV evaluated on the grid; should ≈ n_electrons.
Diagnostic: when this deviates by > 1e-3 from the integer
electron count, the integration grid is too coarse or the
SAD promolecule didn't converge for some atom (very rare).
molecule_norm : float
∫ ρ_mol dV evaluated on the same grid; should also
≈ n_electrons. Comparing the two norms tells you whether
grid error is in the molecular density or the promolecule.
n_grid_points : int
Total Becke-Lebedev-Treutler grid point count used. Scales
with ``GridOptions.n_radial × angular order × n_atoms``.
"""
charges: np.ndarray
electron_population: np.ndarray
promolecule_norm: float
molecule_norm: float
n_grid_points: int
def __repr__(self) -> str:
return (
f"HirshfeldResult(charges=<{len(self.charges)} atoms>, "
f"Σq={self.charges.sum():+.6f}, "
f"n_e(mol)={self.molecule_norm:.4f}, "
f"n_e(pro)={self.promolecule_norm:.4f}, "
f"n_grid={self.n_grid_points})"
)
[docs]
def hirshfeld_charges(
result,
basis: BasisSet,
molecule: Molecule,
*,
grid_options: Optional[GridOptions] = None,
rho_floor: float = 1.0e-30,
max_block_elems: int = 50_000_000,
) -> HirshfeldResult:
"""Classical Hirshfeld atomic partial charges from a converged SCF.
Hirshfeld, F. L. *Theor. Chim. Acta* **44**, 129 (1977).
q_A = Z_A − ∫ w_A(r) ρ(r) d³r,
w_A(r) = ρ_A^free(r) / Σ_B ρ_B^free(r).
Unlike :func:`mulliken_charges` and :func:`loewdin_charges`,
Hirshfeld is a *real-space* partition rather than a basis-space
partition — far less sensitive to diffuse functions, and the
standard input charge for charge-dependent dispersion methods
like the D4 refinement scheduled for vibe-qc v0.10.0 D2b.
The promolecular reference {ρ_A^free} is obtained for free from
:func:`vibeqc.sad_density` — it returns the SAD initial-guess
density matrix, which is block-diagonal by atom (each atomic
SCF runs in vacuum, contributes to its own AO range, off-
diagonal blocks are zero). Restricting the AO sum to atom A's
basis-function range gives ρ_A^free evaluated at the molecular
geometry. No per-atom SCF, no ionic-fragment branching.
Parameters
----------
result
SCF result from ``vibeqc.run_rhf`` / ``run_rks`` /
``run_uhf`` / ``run_uks``. The total density matrix is
extracted via :func:`_total_density` (handles both the
closed-shell ``.density`` and the open-shell
``.density_alpha`` + ``.density_beta`` schemas).
basis
The same :class:`BasisSet` used to run the SCF.
molecule
The same :class:`Molecule` used to run the SCF.
grid_options
Optional :class:`GridOptions` for the Becke-Lebedev-
Treutler integration grid. Defaults to ``GridOptions()``
(vibe-qc's DFT-default level). Hirshfeld weights are
smooth so the default grid is plenty for sub-millielectron
charge accuracy; tighten only for explicit
grid-convergence studies.
rho_floor
Promolecule density floor to keep w_A = ρ_A / ρ_pro
well-defined in vacuum regions far from any atom. Default
``1e-30`` is well below any grid point that contributes
meaningfully to the integral.
max_block_elems
Memory cap for the grid sweep. The AO matrix χ_μ(r_g) is
``(n_block, n_bf)``; the grid is processed in blocks sized
so ``n_block × n_bf`` never exceeds this many elements
(~``8 × max_block_elems`` bytes of float64). Default
``5×10⁷`` ≈ 400 MB per block. Small molecules fit in a
single block (no behaviour change); large systems are
swept block-by-block so the function never materialises a
multi-gigabyte AO matrix. The result is independent of
the block size to floating-point round-off.
Returns
-------
HirshfeldResult
Rich dataclass; ``HirshfeldResult.charges`` is the
``(n_atoms,)`` array if you want bare-array semantics
matching :func:`mulliken_charges`. The other fields carry
per-atom integrated electron populations and grid-
normalisation sanity numbers.
Notes
-----
Numerical-quality knobs:
* ``∫ ρ_mol − n_electrons`` should be < 5×10⁻⁵ on the default
grid for typical first-row systems.
* ``Σ q_A − molecule.charge`` should be < 1×10⁻⁶ e (exact
identity from the Hirshfeld weight normalisation; only grid
error introduces residual).
Classical Hirshfeld charges in vibe-qc come out ~0.04-0.06 e
*more negative* on heavy atoms than ORCA-reported values
because the promolecule is constructed in the molecular basis
(SAD-derived) rather than from tabulated Slater-type atomic
densities. Documentable trade-off — switch sources via a
future ``promolecule="slater"`` kwarg if byte-equal ORCA
parity matters.
The iterative Hirshfeld variant (Bultinck et al., *J. Chem.
Phys.* **126**, 144111 (2007)) — which is less promolecule-
sensitive — is a clean follow-on; same API, different inner
loop. Tracked as a v0.10.0 D2b-i candidate.
"""
grid = build_grid(molecule, grid_options if grid_options is not None
else GridOptions())
points = np.asarray(grid.points, dtype=np.float64)
weights = np.asarray(grid.weights, dtype=np.float64)
n_grid = points.shape[0]
n_bf = basis.nbasis
n_atoms = len(molecule.atoms)
P_mol = _total_density(result)
# Promolecular density matrix is the SAD guess (block-diagonal
# by atom — see docstring above).
P_pro = np.asarray(sad_density(molecule, basis), dtype=np.float64)
ao_to_atom = _shell_to_atom(basis)
# Precompute per-atom AO masks + the diagonal P_pro blocks once.
atom_masks = [(ao_to_atom == A) for A in range(n_atoms)]
atom_P_blocks = [
(P_pro[np.ix_(m, m)] if m.any() else None) for m in atom_masks
]
# Process the grid in blocks so the (n_block, n_bf) AO matrix
# never exceeds ``max_block_elems`` float64 entries. Small
# molecules collapse to a single block (identical to the old
# one-shot path); large systems are swept without ever holding
# a multi-GB χ matrix in memory.
block = max(1, int(max_block_elems // max(n_bf, 1)))
electron_pop = np.zeros(n_atoms, dtype=np.float64)
promolecule_norm = 0.0
molecule_norm = 0.0
for start in range(0, n_grid, block):
stop = min(start + block, n_grid)
pts = points[start:stop]
w = weights[start:stop]
# χ_μ(r_g) for this block: (n_block, n_bf).
chi = np.asarray(evaluate_ao(basis, pts), dtype=np.float64)
# ρ_mol(r_g) = Σ_μν P_μν χ_μ χ_ν — (P·χᵀ)·χ row-wise.
rho_mol = np.einsum("gm,gm->g", chi @ P_mol, chi)
# ρ_A^free(r_g) per atom — AO sum restricted to A's range
# thanks to P_pro's block-diagonal structure.
rho_atoms = np.zeros((n_atoms, stop - start), dtype=np.float64)
for A in range(n_atoms):
mask = atom_masks[A]
P_AA = atom_P_blocks[A]
if P_AA is None: # ghost atom — row stays 0
continue
chi_A = chi[:, mask]
rho_atoms[A] = np.einsum("gm,gm->g", chi_A @ P_AA, chi_A)
rho_pro = rho_atoms.sum(axis=0)
# Hirshfeld weights with a ρ_pro floor for far-out points
# that would otherwise be ~0/~0.
safe_pro = np.maximum(rho_pro, rho_floor)
weights_atoms = rho_atoms / safe_pro
electron_pop += weights_atoms @ (w * rho_mol)
promolecule_norm += float((w * rho_pro).sum())
molecule_norm += float((w * rho_mol).sum())
Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
charges = Z - electron_pop
return HirshfeldResult(
charges=charges,
electron_population=electron_pop,
promolecule_norm=promolecule_norm,
molecule_norm=molecule_norm,
n_grid_points=n_grid,
)
# ---------------------------------------------------------------------------
# Mayer bond orders
# ---------------------------------------------------------------------------
[docs]
def mayer_bond_orders(result, basis: BasisSet,
molecule: Molecule) -> np.ndarray:
"""Mayer bond-order matrix, shape (n_atoms, n_atoms).
For closed-shell systems:
B_AB = Σ_{μ ∈ A} Σ_{ν ∈ B} (P·S)_μν · (P·S)_νμ
For UHF / UKS we replace (P·S)(P·S) with the spin-resolved
definition 2·[(Pα·S)(Pα·S) + (Pβ·S)(Pβ·S)] which reduces to the
closed-shell formula when Pα = Pβ = P/2.
Diagonal entries are the "free valence" of each atom. The off-
diagonals are the chemically-meaningful bond orders.
"""
S = np.asarray(compute_overlap(basis))
ao_to_atom = _shell_to_atom(basis)
n_atoms = len(molecule.atoms)
if hasattr(result, "density_alpha"):
# Real part for complex (periodic, Hermitian) densities — the Mayer
# bond order is a real observable; see _real_if_hermitian.
Pa = _real_if_hermitian(result.density_alpha, what="alpha density")
Pb = _real_if_hermitian(result.density_beta, what="beta density")
PS_a = Pa @ S
PS_b = Pb @ S
# Element-wise Mayer: M_μν = 2·[(PS_a)_μν (PS_a)_νμ
# + (PS_b)_μν (PS_b)_νμ]
M = 2.0 * (PS_a * PS_a.T + PS_b * PS_b.T)
else:
P = _real_if_hermitian(result.density)
PS = P @ S
M = PS * PS.T # broadcasting element-wise; equivalent to
# M_μν = (PS)_μν · (PS)_νμ since the matrix is
# real.
bond_orders = np.zeros((n_atoms, n_atoms), dtype=np.float64)
for mu in range(M.shape[0]):
a = ao_to_atom[mu]
for nu in range(M.shape[1]):
b = ao_to_atom[nu]
if a != b:
bond_orders[a, b] += M[mu, nu]
# Symmetrize numerically — analytical B_AB = B_BA for real AOs.
bond_orders = 0.5 * (bond_orders + bond_orders.T)
return bond_orders
def prominent_bonds(
bond_orders: np.ndarray,
molecule: Molecule,
*,
threshold: float = 0.10,
) -> list[tuple[int, int, float]]:
"""Return ``[(i, j, B_ij)]`` pairs with ``i < j`` and
``B_ij >= threshold`` — convenience for formatting a compact
bond-order table in the log output."""
n = bond_orders.shape[0]
out: list[tuple[int, int, float]] = []
for i in range(n):
for j in range(i + 1, n):
if bond_orders[i, j] >= threshold:
out.append((i, j, float(bond_orders[i, j])))
# Sort by descending bond order so the covalent bonds float to the top.
out.sort(key=lambda t: -t[2])
return out
# ---------------------------------------------------------------------------
# Dipole moment
# ---------------------------------------------------------------------------
_BOHR_TO_DEBYE = 2.541746473 # 1 e·bohr = 2.541746473 Debye
[docs]
@dataclass
class DipoleMoment:
"""Dipole moment components in atomic units (e·bohr), plus Debye."""
x: float
y: float
z: float
origin: tuple[float, float, float]
@property
def total(self) -> float:
r"""\|mu\| in atomic units (e\*bohr)."""
return float(np.sqrt(self.x ** 2 + self.y ** 2 + self.z ** 2))
@property
def total_debye(self) -> float:
return self.total * _BOHR_TO_DEBYE
[docs]
def components_debye(self) -> tuple[float, float, float]:
return (self.x * _BOHR_TO_DEBYE,
self.y * _BOHR_TO_DEBYE,
self.z * _BOHR_TO_DEBYE)
[docs]
def dipole_moment(
result,
basis: BasisSet,
molecule: Molecule,
*,
origin: Optional[Sequence[float]] = None,
) -> DipoleMoment:
"""Electric dipole moment of a converged SCF calculation.
``origin`` (bohr) defaults to the molecular center of mass, which
makes the dipole origin-independent for neutral systems (the
convention every standard QC code uses). For charged systems the
dipole depends on origin; pass an explicit vector if you need a
particular reference.
"""
if origin is None:
origin_vec = center_of_mass(molecule)
else:
origin_vec = np.asarray(origin, dtype=np.float64)
if origin_vec.shape != (3,):
raise ValueError("dipole_moment: origin must be a 3-vector (bohr)")
dip = compute_dipole(basis, [float(x) for x in origin_vec])
Mx = np.asarray(dip.x)
My = np.asarray(dip.y)
Mz = np.asarray(dip.z)
P = _total_density(result)
# Electronic contribution (electrons are negative): -tr(P · M_c).
mu_e_x = -np.einsum("ij,ji->", P, Mx)
mu_e_y = -np.einsum("ij,ji->", P, My)
mu_e_z = -np.einsum("ij,ji->", P, Mz)
# Nuclear contribution (with origin shift): Σ_A Z_A (R_A − O).
mu_n_x = 0.0
mu_n_y = 0.0
mu_n_z = 0.0
for atom in molecule.atoms:
z = float(atom.Z)
mu_n_x += z * (atom.xyz[0] - origin_vec[0])
mu_n_y += z * (atom.xyz[1] - origin_vec[1])
mu_n_z += z * (atom.xyz[2] - origin_vec[2])
return DipoleMoment(
x=float(mu_e_x + mu_n_x),
y=float(mu_e_y + mu_n_y),
z=float(mu_e_z + mu_n_z),
origin=(float(origin_vec[0]),
float(origin_vec[1]),
float(origin_vec[2])),
)
# ---------------------------------------------------------------------------
# Natural orbitals
# ---------------------------------------------------------------------------
@dataclass
class NaturalOrbitals:
"""Natural orbitals + occupations, sorted by descending occupation.
Attributes
----------
occupations
``(n_bf,)`` real array. For ``kind="rhf"`` and
``kind="uhf-total"`` occupations are in ``[0, 2]``; for
``kind="uhf-alpha"`` / ``kind="uhf-beta"`` they are in
``[0, 1]``; for ``kind="uhf-spin"`` they are in ``[-1, 1]``.
coefficients
``(n_bf, n_bf)`` real matrix. Each column is one NO expressed in
the AO basis, S-normalized: ``C^T S C = I``. Columns are
ordered by descending occupation so ``coefficients[:, :n_occ]``
is the natural-occupation analogue of "occupied MOs".
kind
One of ``"rhf"``, ``"uhf-total"``, ``"uhf-alpha"``,
``"uhf-beta"``, ``"uhf-spin"`` — describes which density matrix
was diagonalized so the user knows how to interpret occupations.
"""
occupations: np.ndarray
coefficients: np.ndarray
kind: str
@property
def n_orbitals(self) -> int:
return int(self.occupations.size)
@property
def n_electrons(self) -> float:
"""Sum of occupations — equals the underlying electron count
(or ``N_α − N_β`` for ``kind="uhf-spin"``) up to FP noise."""
return float(self.occupations.sum())
def _diagonalise_density(D: np.ndarray, S: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""Solve ``D S c_i = n_i c_i`` via the Löwdin route. Returns
``(occupations, coefficients)`` sorted by *descending* occupation
with the AO-basis NOs S-normalized (``C^T S C = I``).
Numerically: form ``D̃ = S^{1/2} D S^{1/2}`` (real symmetric),
diagonalize, then transform eigenvectors back via
``C = S^{-1/2} U``. This avoids the non-symmetric eigenproblem
``D · S`` (which has real eigenvalues but complex-arithmetic
eigenvectors)."""
s_eig, U_S = np.linalg.eigh(S)
if np.min(s_eig) < 1e-10:
raise ValueError(
f"natural_orbitals: overlap is near-singular "
f"(min eigenvalue {np.min(s_eig):.2e})"
)
sqrt_s = np.sqrt(s_eig)
inv_sqrt_s = 1.0 / sqrt_s
S_half = U_S @ np.diag(sqrt_s) @ U_S.T
S_inv_half = U_S @ np.diag(inv_sqrt_s) @ U_S.T
D_tilde = S_half @ D @ S_half
# Force-symmetrize — D̃ is exactly symmetric in exact arithmetic.
D_tilde = 0.5 * (D_tilde + D_tilde.T)
n, U = np.linalg.eigh(D_tilde)
# eigh returns ascending; flip to descending occupation.
order = np.argsort(-n)
n = n[order]
U = U[:, order]
C = S_inv_half @ U
return n, C
def natural_orbitals(result, basis: BasisSet, *,
kind: str = "auto") -> NaturalOrbitals:
"""Diagonalize the SCF one-particle density matrix to get natural
orbitals + occupations.
For a single-determinant RHF/RKS the occupations come out exactly
integer (2.0 for occupied, 0.0 for virtual) and the NOs span the
same occupied/virtual subspaces as the canonical MOs (different
rotations within those subspaces only). For UHF/UKS the *total*
natural orbitals (default) carry fractional occupations whose
deviation from {0, 2} measures spin contamination /
multireference character; the *spin* natural orbitals
(``kind="uhf-spin"``) carry the unpaired-electron distribution
whose largest-eigenvalue magnitudes localise the open-shell
character.
Parameters
----------
result
Output of :func:`vibeqc.run_rhf`, :func:`vibeqc.run_rks`,
:func:`vibeqc.run_uhf`, or :func:`vibeqc.run_uks`.
basis
The same :class:`BasisSet` used to run the SCF (the AO overlap
is recomputed from it).
kind
``"auto"`` (default) chooses ``"rhf"`` for closed-shell results
and ``"uhf-total"`` for open-shell. Other accepted values:
* ``"rhf"`` — for an RHF/RKS result, diagonalize ``D`` directly
(occupations 0..2).
* ``"uhf-total"`` — for a UHF/UKS result, diagonalize
``D_α + D_β`` (occupations 0..2, sum = N_e).
* ``"uhf-alpha"`` / ``"uhf-beta"`` — diagonalize ``D_α`` or
``D_β`` alone (occupations 0..1, sum = N_α or N_β).
* ``"uhf-spin"`` — diagonalize the spin density ``D_α − D_β``
(occupations −1..1, sum = N_α − N_β). Eigenvectors with the
largest |occupation| pick out where the unpaired spins live.
"""
is_open_shell = hasattr(result, "density_alpha")
if kind == "auto":
kind = "uhf-total" if is_open_shell else "rhf"
if kind == "rhf":
if is_open_shell:
raise ValueError(
"natural_orbitals: kind='rhf' requires a closed-shell "
"(RHF/RKS) result; got an open-shell result. Use "
"kind='uhf-total' instead."
)
D = np.asarray(result.density)
elif kind == "uhf-total":
if not is_open_shell:
raise ValueError(
"natural_orbitals: kind='uhf-total' requires an "
"open-shell (UHF/UKS) result."
)
D = (np.asarray(result.density_alpha)
+ np.asarray(result.density_beta))
elif kind == "uhf-alpha":
if not is_open_shell:
raise ValueError(
"natural_orbitals: kind='uhf-alpha' requires an "
"open-shell result.")
D = np.asarray(result.density_alpha)
elif kind == "uhf-beta":
if not is_open_shell:
raise ValueError(
"natural_orbitals: kind='uhf-beta' requires an "
"open-shell result.")
D = np.asarray(result.density_beta)
elif kind == "uhf-spin":
if not is_open_shell:
raise ValueError(
"natural_orbitals: kind='uhf-spin' requires an "
"open-shell result.")
D = (np.asarray(result.density_alpha)
- np.asarray(result.density_beta))
else:
raise ValueError(
f"natural_orbitals: unknown kind={kind!r}. Valid: 'auto', "
f"'rhf', 'uhf-total', 'uhf-alpha', 'uhf-beta', 'uhf-spin'."
)
S = np.asarray(compute_overlap(basis))
occupations, coefficients = _diagonalise_density(D, S)
return NaturalOrbitals(
occupations=occupations,
coefficients=coefficients,
kind=kind,
)
def idempotency_deviation(no: NaturalOrbitals) -> float:
"""Scalar measure of how far the density matrix is from a single
Slater determinant. Larger values flag multireference character or
spin contamination.
For ``kind="rhf"`` / ``kind="uhf-total"`` (occupations in [0, 2]):
Δ = Σ_i n_i (2 − n_i) / 2
A pure single-determinant Hartree–Fock state gives ``Δ = 0`` (every
NO is exactly 0 or 2). For UHF the value is the standard
"non-idempotency" diagnostic — small for well-behaved closed-shell
systems and growing with spin contamination.
For ``kind="uhf-alpha"``, ``"uhf-beta"`` (occupations in [0, 1]):
Δ = Σ_i n_i (1 − n_i)
For ``kind="uhf-spin"`` (occupations in [−1, 1]):
Δ = Σ_i (1 − n_i²) / 2 (Yamaguchi-style estimate of the
number of unpaired electrons; the
proper "N_unpaired" via the
Head-Gordon definition uses
2·(D_α D_β S) eigenvalues, which
this function does not compute.)
"""
n = np.asarray(no.occupations, dtype=float)
if no.kind in ("rhf", "uhf-total"):
return 0.5 * float(np.sum(n * (2.0 - n)))
if no.kind in ("uhf-alpha", "uhf-beta"):
return float(np.sum(n * (1.0 - n)))
if no.kind == "uhf-spin":
return 0.5 * float(np.sum(1.0 - n * n))
raise ValueError(f"idempotency_deviation: unknown NO kind {no.kind!r}")