Source code for vibeqc.properties

"""Post-SCF molecular properties: atomic charges, bond orders, dipole.

The standard "sanity-check" output you expect from every QC program,
built on top of a converged SCF result.

Public API
----------

.. autofunction:: mulliken_charges
.. autofunction:: loewdin_charges
.. autofunction:: mayer_bond_orders
.. autofunction:: dipole_moment
.. autofunction:: center_of_mass
.. autofunction:: natural_orbitals
.. autofunction:: idempotency_deviation

All functions accept the ``result`` object returned by
:func:`vibeqc.run_rhf`, :func:`vibeqc.run_uhf`, :func:`vibeqc.run_rks`,
or :func:`vibeqc.run_uks`; the matching :class:`Molecule` and
:class:`BasisSet` used to run the SCF; and for dipole moments an
optional origin.

Implementation notes
--------------------

Mulliken and Löwdin population analyses are AO-basis-dependent in well-
known ways — Mulliken in particular is very sensitive to diffuse
functions. The output is useful for trend-watching across a series
(e.g. charge transfer along a reaction coordinate) but atomic charges
beyond the leading digit should never be taken too seriously. Mayer
bond orders are rotation-invariant and less basis-sensitive.
"""

from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, Iterable, Optional, Sequence

import numpy as np

from ._vibeqc_core import (
    BasisSet,
    GridOptions,
    Molecule,
    build_grid,
    compute_dipole,
    compute_overlap,
    evaluate_ao,
    sad_density,
)

if TYPE_CHECKING:  # pragma: no cover — import cycle avoidance
    from ._vibeqc_core import RHFResult  # noqa: F401


__all__ = [
    "DipoleMoment",
    "HirshfeldResult",
    "NaturalOrbitals",
    "mulliken_charges",
    "loewdin_charges",
    "hirshfeld_charges",
    "mayer_bond_orders",
    "dipole_moment",
    "center_of_mass",
    "natural_orbitals",
    "idempotency_deviation",
]


# Standard atomic masses (u), index 0 unused.  Dalton approximate values —
# good to ~1e−3 u — plenty for center-of-mass computation. We ship a
# subset sufficient for H–Kr; for heavier elements callers pass an
# explicit origin.
_ATOMIC_MASSES: tuple[float, ...] = (
    0.0,
    1.008,   4.003,                                              # H  He
    6.94,    9.012,  10.81,  12.011, 14.007, 15.999, 18.998, 20.180,  # Li–Ne
    22.990, 24.305, 26.982, 28.085, 30.974, 32.06,  35.45,  39.948,   # Na–Ar
    39.098, 40.078, 44.956, 47.867, 50.942, 51.996, 54.938, 55.845,   # K–Fe
    58.933, 58.693, 63.546, 65.38,  69.723, 72.630, 74.922, 78.971,   # Co–Se
    79.904, 83.798,                                                    # Br Kr
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _real_if_hermitian(P: np.ndarray, *, what: str = "density matrix") -> np.ndarray:
    """Return the real part of a complex-but-Hermitian density matrix.

    Periodic SCF results carry a *complex-typed* density that is Hermitian
    with a machine-noise imaginary part. The molecular-property code forms
    **real observables** — ``tr(P·O)`` (dipole, Mayer bond order), per-atom
    ``(P·S)`` population sums, the density on a real grid (Hirshfeld) — for
    which the imaginary part of a Hermitian ``P`` contracted with a real
    operator cancels exactly. Returning the real part keeps those
    observables real and stops a periodic run from emitting a
    ``ComplexWarning`` on every property (the silent ``float(...)`` /
    ``float64`` casts that warning came from also discarded the imaginary
    part — this does it once, intentionally, after a Hermiticity check).

    A *non-negligible* imaginary part means a genuinely non-Hermitian
    density (a bug), surfaced with an actionable warning rather than
    discarded silently. Real-typed input is returned unchanged (molecular
    RHF/RKS/UHF/UKS path — zero behaviour change).
    """
    P = np.asarray(P)
    if not np.iscomplexobj(P):
        return P
    if P.size:
        max_im = float(np.abs(P.imag).max())
        max_re = float(np.abs(P.real).max())
        if max_im > 1e-8 * max(max_re, 1.0):
            warnings.warn(
                f"{what} has a non-negligible imaginary part "
                f"(max|Im|={max_im:.2e} vs max|Re|={max_re:.2e}); molecular-"
                "property values are derived from its real part and may be "
                "unreliable. This usually indicates a non-Hermitian density "
                "matrix (a bug) rather than a well-converged periodic SCF.",
                RuntimeWarning,
                stacklevel=3,
            )
    return np.ascontiguousarray(P.real)


def _total_density(result) -> np.ndarray:
    """Return the total (closed-shell + open-shell) density matrix.

    RHF/RKS store the already-combined density on ``.density``; UHF/UKS
    expose ``density_alpha`` and ``density_beta`` separately. Complex
    periodic densities are reduced to their (Hermitian) real part via
    :func:`_real_if_hermitian` so downstream observables stay real.
    """
    if hasattr(result, "density_alpha"):
        P = np.asarray(result.density_alpha) + np.asarray(result.density_beta)
    else:
        P = np.asarray(result.density)
    return _real_if_hermitian(P)


def _shell_to_atom(basis: BasisSet) -> np.ndarray:
    """1-D int array, length ``nbasis``, mapping each AO to the 0-based
    atom index it lives on. Computed from ``basis.shells()`` (which is
    public C++ API exposed for basis-set I/O)."""
    shells = basis.shells()
    per_ao: list[int] = []
    for shell in shells:
        # 2L+1 spherical AOs per shell (vibe-qc forces set_pure(true)
        # at basis construction; Cartesian shells would have a
        # different multiplicity, but that is not our configuration).
        n = 2 * shell.l + 1
        per_ao.extend([int(shell.atom_index)] * n)
    return np.asarray(per_ao, dtype=np.int64)


def _per_atom_sum(ao_values: np.ndarray, ao_to_atom: np.ndarray,
                  n_atoms: int) -> np.ndarray:
    """Sum an nbasis-length array into per-atom totals."""
    out = np.zeros(n_atoms, dtype=np.float64)
    for a, v in zip(ao_to_atom, ao_values):
        out[a] += v
    return out



[docs]
def center_of_mass(molecule: Molecule) -> np.ndarray:
    """Center of mass (bohr). Atomic masses from a built-in table up to
    Z = 36; callers with heavier elements should specify the origin to
    dipole_moment directly."""
    atoms = list(molecule.atoms)
    if not atoms:
        return np.zeros(3)
    total_mass = 0.0
    com = np.zeros(3)
    for atom in atoms:
        z = int(atom.Z)
        if z < len(_ATOMIC_MASSES):
            m = _ATOMIC_MASSES[z]
        else:
            # Crude fall-back: 2 u per nucleon ≈ A, and A ≈ 2 Z on average
            # for light elements — good enough not to throw. Heavy-element
            # users should pass an explicit origin.
            m = 2.0 * z
        pos = np.array([atom.xyz[0], atom.xyz[1], atom.xyz[2]])
        com += m * pos
        total_mass += m
    if total_mass == 0.0:
        return np.zeros(3)
    return com / total_mass



# ---------------------------------------------------------------------------
# Mulliken population analysis
# ---------------------------------------------------------------------------


[docs]
def mulliken_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray:
    """Mulliken atomic partial charges q_A = Z_A − Σ_{μ ∈ A} (P·S)_μμ.

    Returns a 1-D array of length ``n_atoms``. Charges sum to the total
    molecular charge (``molecule.charge``) to machine precision. The
    overall partition is AO-basis-dependent; Mulliken is sensitive to
    diffuse functions and should be used for trend analysis rather than
    for quantitative charge assignment.
    """
    P = _total_density(result)
    S = np.asarray(compute_overlap(basis))
    PS_diag = np.einsum("ij,ji->i", P, S)   # diag(P · S)
    ao_to_atom = _shell_to_atom(basis)
    n_atoms = len(molecule.atoms)
    electron_pop = _per_atom_sum(PS_diag, ao_to_atom, n_atoms)
    Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
    return Z - electron_pop



# ---------------------------------------------------------------------------
# Löwdin population analysis
# ---------------------------------------------------------------------------


[docs]
def loewdin_charges(result, basis: BasisSet, molecule: Molecule) -> np.ndarray:
    """Löwdin (symmetric orthogonalisation) atomic partial charges:

        q_A = Z_A − Σ_{μ ∈ A} (S^{1/2} · P · S^{1/2})_μμ.

    Compared to Mulliken this partitions the off-diagonal overlap
    equally between the two AOs involved — less basis-sensitive in
    practice.
    """
    P = _total_density(result)
    S = np.asarray(compute_overlap(basis))
    # S = U · diag(s) · U.T → S^{1/2} = U · diag(√s) · U.T
    w, V = np.linalg.eigh(S)
    if np.min(w) < 1e-10:
        # Defensive: raise the way run_rhf's symmetric-orthogonalisation
        # does. In practice vibe-qc's basis sets never trigger this on
        # molecules; periodic near-linear-dependence surfaces earlier.
        raise ValueError(
            f"loewdin_charges: overlap matrix is near-singular "
            f"(min eigenvalue {np.min(w):.2e})"
        )
    S_half = V @ np.diag(np.sqrt(w)) @ V.T
    SPS = S_half @ P @ S_half
    diag = np.diag(SPS)
    ao_to_atom = _shell_to_atom(basis)
    n_atoms = len(molecule.atoms)
    electron_pop = _per_atom_sum(diag, ao_to_atom, n_atoms)
    Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
    return Z - electron_pop



# ---------------------------------------------------------------------------
# Hirshfeld population analysis
# ---------------------------------------------------------------------------


[docs]
@dataclass
class HirshfeldResult:
    """Output of :func:`hirshfeld_charges`.

    Attributes
    ----------
    charges : np.ndarray
        ``(n_atoms,)`` array of Hirshfeld atomic partial charges in
        electrons. Sums to ``molecule.charge`` to grid precision.
        Sign convention matches :func:`mulliken_charges` /
        :func:`loewdin_charges` (positive = electron-deficient).
    electron_population : np.ndarray
        ``(n_atoms,)`` array of ∫ w_A(r) ρ(r) dV — the Hirshfeld-
        partitioned electron count on each atom. ``Z_A − this`` is
        ``charges[A]``.
    promolecule_norm : float
        ∫ ρ_pro dV evaluated on the grid; should ≈ n_electrons.
        Diagnostic: when this deviates by > 1e-3 from the integer
        electron count, the integration grid is too coarse or the
        SAD promolecule didn't converge for some atom (very rare).
    molecule_norm : float
        ∫ ρ_mol dV evaluated on the same grid; should also
        ≈ n_electrons. Comparing the two norms tells you whether
        grid error is in the molecular density or the promolecule.
    n_grid_points : int
        Total Becke-Lebedev-Treutler grid point count used. Scales
        with ``GridOptions.n_radial × angular order × n_atoms``.
    """

    charges: np.ndarray
    electron_population: np.ndarray
    promolecule_norm: float
    molecule_norm: float
    n_grid_points: int

    def __repr__(self) -> str:
        return (
            f"HirshfeldResult(charges=<{len(self.charges)} atoms>, "
            f"Σq={self.charges.sum():+.6f}, "
            f"n_e(mol)={self.molecule_norm:.4f}, "
            f"n_e(pro)={self.promolecule_norm:.4f}, "
            f"n_grid={self.n_grid_points})"
        )




[docs]
def hirshfeld_charges(
    result,
    basis: BasisSet,
    molecule: Molecule,
    *,
    grid_options: Optional[GridOptions] = None,
    rho_floor: float = 1.0e-30,
    max_block_elems: int = 50_000_000,
) -> HirshfeldResult:
    """Classical Hirshfeld atomic partial charges from a converged SCF.

    Hirshfeld, F. L. *Theor. Chim. Acta* **44**, 129 (1977).

        q_A = Z_A − ∫ w_A(r) ρ(r) d³r,
        w_A(r) = ρ_A^free(r) / Σ_B ρ_B^free(r).

    Unlike :func:`mulliken_charges` and :func:`loewdin_charges`,
    Hirshfeld is a *real-space* partition rather than a basis-space
    partition — far less sensitive to diffuse functions, and the
    standard input charge for charge-dependent dispersion methods
    like the D4 refinement scheduled for vibe-qc v0.10.0 D2b.

    The promolecular reference {ρ_A^free} is obtained for free from
    :func:`vibeqc.sad_density` — it returns the SAD initial-guess
    density matrix, which is block-diagonal by atom (each atomic
    SCF runs in vacuum, contributes to its own AO range, off-
    diagonal blocks are zero). Restricting the AO sum to atom A's
    basis-function range gives ρ_A^free evaluated at the molecular
    geometry. No per-atom SCF, no ionic-fragment branching.

    Parameters
    ----------
    result
        SCF result from ``vibeqc.run_rhf`` / ``run_rks`` /
        ``run_uhf`` / ``run_uks``. The total density matrix is
        extracted via :func:`_total_density` (handles both the
        closed-shell ``.density`` and the open-shell
        ``.density_alpha`` + ``.density_beta`` schemas).
    basis
        The same :class:`BasisSet` used to run the SCF.
    molecule
        The same :class:`Molecule` used to run the SCF.
    grid_options
        Optional :class:`GridOptions` for the Becke-Lebedev-
        Treutler integration grid. Defaults to ``GridOptions()``
        (vibe-qc's DFT-default level). Hirshfeld weights are
        smooth so the default grid is plenty for sub-millielectron
        charge accuracy; tighten only for explicit
        grid-convergence studies.
    rho_floor
        Promolecule density floor to keep w_A = ρ_A / ρ_pro
        well-defined in vacuum regions far from any atom. Default
        ``1e-30`` is well below any grid point that contributes
        meaningfully to the integral.
    max_block_elems
        Memory cap for the grid sweep. The AO matrix χ_μ(r_g) is
        ``(n_block, n_bf)``; the grid is processed in blocks sized
        so ``n_block × n_bf`` never exceeds this many elements
        (~``8 × max_block_elems`` bytes of float64). Default
        ``5×10⁷`` ≈ 400 MB per block. Small molecules fit in a
        single block (no behaviour change); large systems are
        swept block-by-block so the function never materialises a
        multi-gigabyte AO matrix. The result is independent of
        the block size to floating-point round-off.

    Returns
    -------
    HirshfeldResult
        Rich dataclass; ``HirshfeldResult.charges`` is the
        ``(n_atoms,)`` array if you want bare-array semantics
        matching :func:`mulliken_charges`. The other fields carry
        per-atom integrated electron populations and grid-
        normalisation sanity numbers.

    Notes
    -----
    Numerical-quality knobs:

    * ``∫ ρ_mol − n_electrons`` should be < 5×10⁻⁵ on the default
      grid for typical first-row systems.
    * ``Σ q_A − molecule.charge`` should be < 1×10⁻⁶ e (exact
      identity from the Hirshfeld weight normalisation; only grid
      error introduces residual).

    Classical Hirshfeld charges in vibe-qc come out ~0.04-0.06 e
    *more negative* on heavy atoms than ORCA-reported values
    because the promolecule is constructed in the molecular basis
    (SAD-derived) rather than from tabulated Slater-type atomic
    densities. Documentable trade-off — switch sources via a
    future ``promolecule="slater"`` kwarg if byte-equal ORCA
    parity matters.

    The iterative Hirshfeld variant (Bultinck et al., *J. Chem.
    Phys.* **126**, 144111 (2007)) — which is less promolecule-
    sensitive — is a clean follow-on; same API, different inner
    loop. Tracked as a v0.10.0 D2b-i candidate.
    """
    grid = build_grid(molecule, grid_options if grid_options is not None
                      else GridOptions())
    points = np.asarray(grid.points, dtype=np.float64)
    weights = np.asarray(grid.weights, dtype=np.float64)
    n_grid = points.shape[0]

    n_bf = basis.nbasis
    n_atoms = len(molecule.atoms)
    P_mol = _total_density(result)
    # Promolecular density matrix is the SAD guess (block-diagonal
    # by atom — see docstring above).
    P_pro = np.asarray(sad_density(molecule, basis), dtype=np.float64)
    ao_to_atom = _shell_to_atom(basis)
    # Precompute per-atom AO masks + the diagonal P_pro blocks once.
    atom_masks = [(ao_to_atom == A) for A in range(n_atoms)]
    atom_P_blocks = [
        (P_pro[np.ix_(m, m)] if m.any() else None) for m in atom_masks
    ]

    # Process the grid in blocks so the (n_block, n_bf) AO matrix
    # never exceeds ``max_block_elems`` float64 entries. Small
    # molecules collapse to a single block (identical to the old
    # one-shot path); large systems are swept without ever holding
    # a multi-GB χ matrix in memory.
    block = max(1, int(max_block_elems // max(n_bf, 1)))

    electron_pop = np.zeros(n_atoms, dtype=np.float64)
    promolecule_norm = 0.0
    molecule_norm = 0.0

    for start in range(0, n_grid, block):
        stop = min(start + block, n_grid)
        pts = points[start:stop]
        w = weights[start:stop]

        # χ_μ(r_g) for this block: (n_block, n_bf).
        chi = np.asarray(evaluate_ao(basis, pts), dtype=np.float64)

        # ρ_mol(r_g) = Σ_μν P_μν χ_μ χ_ν — (P·χᵀ)·χ row-wise.
        rho_mol = np.einsum("gm,gm->g", chi @ P_mol, chi)

        # ρ_A^free(r_g) per atom — AO sum restricted to A's range
        # thanks to P_pro's block-diagonal structure.
        rho_atoms = np.zeros((n_atoms, stop - start), dtype=np.float64)
        for A in range(n_atoms):
            mask = atom_masks[A]
            P_AA = atom_P_blocks[A]
            if P_AA is None:               # ghost atom — row stays 0
                continue
            chi_A = chi[:, mask]
            rho_atoms[A] = np.einsum("gm,gm->g", chi_A @ P_AA, chi_A)

        rho_pro = rho_atoms.sum(axis=0)

        # Hirshfeld weights with a ρ_pro floor for far-out points
        # that would otherwise be ~0/~0.
        safe_pro = np.maximum(rho_pro, rho_floor)
        weights_atoms = rho_atoms / safe_pro

        electron_pop += weights_atoms @ (w * rho_mol)
        promolecule_norm += float((w * rho_pro).sum())
        molecule_norm += float((w * rho_mol).sum())

    Z = np.array([atom.Z for atom in molecule.atoms], dtype=np.float64)
    charges = Z - electron_pop

    return HirshfeldResult(
        charges=charges,
        electron_population=electron_pop,
        promolecule_norm=promolecule_norm,
        molecule_norm=molecule_norm,
        n_grid_points=n_grid,
    )



# ---------------------------------------------------------------------------
# Mayer bond orders
# ---------------------------------------------------------------------------


[docs]
def mayer_bond_orders(result, basis: BasisSet,
                      molecule: Molecule) -> np.ndarray:
    """Mayer bond-order matrix, shape (n_atoms, n_atoms).

    For closed-shell systems:

        B_AB = Σ_{μ ∈ A} Σ_{ν ∈ B} (P·S)_μν · (P·S)_νμ

    For UHF / UKS we replace (P·S)(P·S) with the spin-resolved
    definition 2·[(Pα·S)(Pα·S) + (Pβ·S)(Pβ·S)] which reduces to the
    closed-shell formula when Pα = Pβ = P/2.

    Diagonal entries are the "free valence" of each atom. The off-
    diagonals are the chemically-meaningful bond orders.
    """
    S = np.asarray(compute_overlap(basis))
    ao_to_atom = _shell_to_atom(basis)
    n_atoms = len(molecule.atoms)

    if hasattr(result, "density_alpha"):
        # Real part for complex (periodic, Hermitian) densities — the Mayer
        # bond order is a real observable; see _real_if_hermitian.
        Pa = _real_if_hermitian(result.density_alpha, what="alpha density")
        Pb = _real_if_hermitian(result.density_beta, what="beta density")
        PS_a = Pa @ S
        PS_b = Pb @ S
        # Element-wise Mayer: M_μν = 2·[(PS_a)_μν (PS_a)_νμ
        #                              + (PS_b)_μν (PS_b)_νμ]
        M = 2.0 * (PS_a * PS_a.T + PS_b * PS_b.T)
    else:
        P = _real_if_hermitian(result.density)
        PS = P @ S
        M = PS * PS.T   # broadcasting element-wise; equivalent to
                        # M_μν = (PS)_μν · (PS)_νμ since the matrix is
                        # real.

    bond_orders = np.zeros((n_atoms, n_atoms), dtype=np.float64)
    for mu in range(M.shape[0]):
        a = ao_to_atom[mu]
        for nu in range(M.shape[1]):
            b = ao_to_atom[nu]
            if a != b:
                bond_orders[a, b] += M[mu, nu]
    # Symmetrize numerically — analytical B_AB = B_BA for real AOs.
    bond_orders = 0.5 * (bond_orders + bond_orders.T)
    return bond_orders



def prominent_bonds(
    bond_orders: np.ndarray,
    molecule: Molecule,
    *,
    threshold: float = 0.10,
) -> list[tuple[int, int, float]]:
    """Return ``[(i, j, B_ij)]`` pairs with ``i < j`` and
    ``B_ij >= threshold`` — convenience for formatting a compact
    bond-order table in the log output."""
    n = bond_orders.shape[0]
    out: list[tuple[int, int, float]] = []
    for i in range(n):
        for j in range(i + 1, n):
            if bond_orders[i, j] >= threshold:
                out.append((i, j, float(bond_orders[i, j])))
    # Sort by descending bond order so the covalent bonds float to the top.
    out.sort(key=lambda t: -t[2])
    return out


# ---------------------------------------------------------------------------
# Dipole moment
# ---------------------------------------------------------------------------

_BOHR_TO_DEBYE = 2.541746473     # 1 e·bohr = 2.541746473 Debye



[docs]
@dataclass
class DipoleMoment:
    """Dipole moment components in atomic units (e·bohr), plus Debye."""
    x: float
    y: float
    z: float
    origin: tuple[float, float, float]

    @property
    def total(self) -> float:
        r"""\|mu\| in atomic units (e\*bohr)."""
        return float(np.sqrt(self.x ** 2 + self.y ** 2 + self.z ** 2))

    @property
    def total_debye(self) -> float:
        return self.total * _BOHR_TO_DEBYE


[docs]
    def components_debye(self) -> tuple[float, float, float]:
        return (self.x * _BOHR_TO_DEBYE,
                self.y * _BOHR_TO_DEBYE,
                self.z * _BOHR_TO_DEBYE)





[docs]
def dipole_moment(
    result,
    basis: BasisSet,
    molecule: Molecule,
    *,
    origin: Optional[Sequence[float]] = None,
) -> DipoleMoment:
    """Electric dipole moment of a converged SCF calculation.

    ``origin`` (bohr) defaults to the molecular center of mass, which
    makes the dipole origin-independent for neutral systems (the
    convention every standard QC code uses). For charged systems the
    dipole depends on origin; pass an explicit vector if you need a
    particular reference.
    """
    if origin is None:
        origin_vec = center_of_mass(molecule)
    else:
        origin_vec = np.asarray(origin, dtype=np.float64)
        if origin_vec.shape != (3,):
            raise ValueError("dipole_moment: origin must be a 3-vector (bohr)")

    dip = compute_dipole(basis, [float(x) for x in origin_vec])
    Mx = np.asarray(dip.x)
    My = np.asarray(dip.y)
    Mz = np.asarray(dip.z)
    P = _total_density(result)

    # Electronic contribution (electrons are negative): -tr(P · M_c).
    mu_e_x = -np.einsum("ij,ji->", P, Mx)
    mu_e_y = -np.einsum("ij,ji->", P, My)
    mu_e_z = -np.einsum("ij,ji->", P, Mz)

    # Nuclear contribution (with origin shift): Σ_A Z_A (R_A − O).
    mu_n_x = 0.0
    mu_n_y = 0.0
    mu_n_z = 0.0
    for atom in molecule.atoms:
        z = float(atom.Z)
        mu_n_x += z * (atom.xyz[0] - origin_vec[0])
        mu_n_y += z * (atom.xyz[1] - origin_vec[1])
        mu_n_z += z * (atom.xyz[2] - origin_vec[2])

    return DipoleMoment(
        x=float(mu_e_x + mu_n_x),
        y=float(mu_e_y + mu_n_y),
        z=float(mu_e_z + mu_n_z),
        origin=(float(origin_vec[0]),
                float(origin_vec[1]),
                float(origin_vec[2])),
    )



# ---------------------------------------------------------------------------
# Natural orbitals
# ---------------------------------------------------------------------------

@dataclass
class NaturalOrbitals:
    """Natural orbitals + occupations, sorted by descending occupation.

    Attributes
    ----------
    occupations
        ``(n_bf,)`` real array. For ``kind="rhf"`` and
        ``kind="uhf-total"`` occupations are in ``[0, 2]``; for
        ``kind="uhf-alpha"`` / ``kind="uhf-beta"`` they are in
        ``[0, 1]``; for ``kind="uhf-spin"`` they are in ``[-1, 1]``.
    coefficients
        ``(n_bf, n_bf)`` real matrix. Each column is one NO expressed in
        the AO basis, S-normalized: ``C^T S C = I``. Columns are
        ordered by descending occupation so ``coefficients[:, :n_occ]``
        is the natural-occupation analogue of "occupied MOs".
    kind
        One of ``"rhf"``, ``"uhf-total"``, ``"uhf-alpha"``,
        ``"uhf-beta"``, ``"uhf-spin"`` — describes which density matrix
        was diagonalized so the user knows how to interpret occupations.
    """

    occupations: np.ndarray
    coefficients: np.ndarray
    kind: str

    @property
    def n_orbitals(self) -> int:
        return int(self.occupations.size)

    @property
    def n_electrons(self) -> float:
        """Sum of occupations — equals the underlying electron count
        (or ``N_α − N_β`` for ``kind="uhf-spin"``) up to FP noise."""
        return float(self.occupations.sum())


def _diagonalise_density(D: np.ndarray, S: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """Solve ``D S c_i = n_i c_i`` via the Löwdin route. Returns
    ``(occupations, coefficients)`` sorted by *descending* occupation
    with the AO-basis NOs S-normalized (``C^T S C = I``).

    Numerically: form ``D̃ = S^{1/2} D S^{1/2}`` (real symmetric),
    diagonalize, then transform eigenvectors back via
    ``C = S^{-1/2} U``. This avoids the non-symmetric eigenproblem
    ``D · S`` (which has real eigenvalues but complex-arithmetic
    eigenvectors)."""
    s_eig, U_S = np.linalg.eigh(S)
    if np.min(s_eig) < 1e-10:
        raise ValueError(
            f"natural_orbitals: overlap is near-singular "
            f"(min eigenvalue {np.min(s_eig):.2e})"
        )
    sqrt_s = np.sqrt(s_eig)
    inv_sqrt_s = 1.0 / sqrt_s
    S_half = U_S @ np.diag(sqrt_s) @ U_S.T
    S_inv_half = U_S @ np.diag(inv_sqrt_s) @ U_S.T
    D_tilde = S_half @ D @ S_half
    # Force-symmetrize — D̃ is exactly symmetric in exact arithmetic.
    D_tilde = 0.5 * (D_tilde + D_tilde.T)
    n, U = np.linalg.eigh(D_tilde)
    # eigh returns ascending; flip to descending occupation.
    order = np.argsort(-n)
    n = n[order]
    U = U[:, order]
    C = S_inv_half @ U
    return n, C


def natural_orbitals(result, basis: BasisSet, *,
                     kind: str = "auto") -> NaturalOrbitals:
    """Diagonalize the SCF one-particle density matrix to get natural
    orbitals + occupations.

    For a single-determinant RHF/RKS the occupations come out exactly
    integer (2.0 for occupied, 0.0 for virtual) and the NOs span the
    same occupied/virtual subspaces as the canonical MOs (different
    rotations within those subspaces only). For UHF/UKS the *total*
    natural orbitals (default) carry fractional occupations whose
    deviation from {0, 2} measures spin contamination /
    multireference character; the *spin* natural orbitals
    (``kind="uhf-spin"``) carry the unpaired-electron distribution
    whose largest-eigenvalue magnitudes localise the open-shell
    character.

    Parameters
    ----------
    result
        Output of :func:`vibeqc.run_rhf`, :func:`vibeqc.run_rks`,
        :func:`vibeqc.run_uhf`, or :func:`vibeqc.run_uks`.
    basis
        The same :class:`BasisSet` used to run the SCF (the AO overlap
        is recomputed from it).
    kind
        ``"auto"`` (default) chooses ``"rhf"`` for closed-shell results
        and ``"uhf-total"`` for open-shell. Other accepted values:

        * ``"rhf"`` — for an RHF/RKS result, diagonalize ``D`` directly
          (occupations 0..2).
        * ``"uhf-total"`` — for a UHF/UKS result, diagonalize
          ``D_α + D_β`` (occupations 0..2, sum = N_e).
        * ``"uhf-alpha"`` / ``"uhf-beta"`` — diagonalize ``D_α`` or
          ``D_β`` alone (occupations 0..1, sum = N_α or N_β).
        * ``"uhf-spin"`` — diagonalize the spin density ``D_α − D_β``
          (occupations −1..1, sum = N_α − N_β). Eigenvectors with the
          largest |occupation| pick out where the unpaired spins live.
    """
    is_open_shell = hasattr(result, "density_alpha")
    if kind == "auto":
        kind = "uhf-total" if is_open_shell else "rhf"

    if kind == "rhf":
        if is_open_shell:
            raise ValueError(
                "natural_orbitals: kind='rhf' requires a closed-shell "
                "(RHF/RKS) result; got an open-shell result. Use "
                "kind='uhf-total' instead."
            )
        D = np.asarray(result.density)
    elif kind == "uhf-total":
        if not is_open_shell:
            raise ValueError(
                "natural_orbitals: kind='uhf-total' requires an "
                "open-shell (UHF/UKS) result."
            )
        D = (np.asarray(result.density_alpha)
             + np.asarray(result.density_beta))
    elif kind == "uhf-alpha":
        if not is_open_shell:
            raise ValueError(
                "natural_orbitals: kind='uhf-alpha' requires an "
                "open-shell result.")
        D = np.asarray(result.density_alpha)
    elif kind == "uhf-beta":
        if not is_open_shell:
            raise ValueError(
                "natural_orbitals: kind='uhf-beta' requires an "
                "open-shell result.")
        D = np.asarray(result.density_beta)
    elif kind == "uhf-spin":
        if not is_open_shell:
            raise ValueError(
                "natural_orbitals: kind='uhf-spin' requires an "
                "open-shell result.")
        D = (np.asarray(result.density_alpha)
             - np.asarray(result.density_beta))
    else:
        raise ValueError(
            f"natural_orbitals: unknown kind={kind!r}. Valid: 'auto', "
            f"'rhf', 'uhf-total', 'uhf-alpha', 'uhf-beta', 'uhf-spin'."
        )

    S = np.asarray(compute_overlap(basis))
    occupations, coefficients = _diagonalise_density(D, S)
    return NaturalOrbitals(
        occupations=occupations,
        coefficients=coefficients,
        kind=kind,
    )


def idempotency_deviation(no: NaturalOrbitals) -> float:
    """Scalar measure of how far the density matrix is from a single
    Slater determinant. Larger values flag multireference character or
    spin contamination.

    For ``kind="rhf"`` / ``kind="uhf-total"`` (occupations in [0, 2]):

        Δ  =  Σ_i  n_i (2 − n_i)  /  2

    A pure single-determinant Hartree–Fock state gives ``Δ = 0`` (every
    NO is exactly 0 or 2). For UHF the value is the standard
    "non-idempotency" diagnostic — small for well-behaved closed-shell
    systems and growing with spin contamination.

    For ``kind="uhf-alpha"``, ``"uhf-beta"`` (occupations in [0, 1]):

        Δ  =  Σ_i  n_i (1 − n_i)

    For ``kind="uhf-spin"`` (occupations in [−1, 1]):

        Δ  =  Σ_i  (1 − n_i²) / 2     (Yamaguchi-style estimate of the
                                       number of unpaired electrons; the
                                       proper "N_unpaired" via the
                                       Head-Gordon definition uses
                                       2·(D_α D_β S) eigenvalues, which
                                       this function does not compute.)
    """
    n = np.asarray(no.occupations, dtype=float)
    if no.kind in ("rhf", "uhf-total"):
        return 0.5 * float(np.sum(n * (2.0 - n)))
    if no.kind in ("uhf-alpha", "uhf-beta"):
        return float(np.sum(n * (1.0 - n)))
    if no.kind == "uhf-spin":
        return 0.5 * float(np.sum(1.0 - n * n))
    raise ValueError(f"idempotency_deviation: unknown NO kind {no.kind!r}")