Source code for vibeqc.periodic_rks_multi_k_ewald

"""Phase 15c-2: multi-k closed-shell RKS SCF driver with composed
EWALD_3D Coulomb dispatch.

Multi-k DFT counterpart of :func:`run_rks_periodic_gamma_ewald3d`
(Phase 15c-1) and structural sibling of
:func:`run_rhf_periodic_multi_k_ewald3d`. At every SCF iteration:

    F(k)  =  H_core(k)  +  Bloch_k[J_SR(g) + J_LR(g) − (α/2)·K(g)]
                       +  Bloch_k[V_xc(g)]

where ``α = func.hf_exchange_fraction`` (1.0 = HF; 0.0 = pure DFT,
K skipped; 0.2 = B3LYP-style hybrid). The 2e Fock blocks come from
:func:`build_periodic_fock_ewald3d_k` (now α-aware), and the XC
contribution comes from :func:`build_xc_periodic` on the periodic
Becke grid using the SCF's *real-space density* (the proper
LatticeMatrixSet built from k-resolved MOs via
:func:`real_space_density_from_kpoints`).

Density flow.  Multi-k SCF carries the density as a
:class:`LatticeMatrixSet` ``D_real`` whose blocks ``D(g)`` Bloch-
sum to the per-k density matrices. ``build_xc_periodic`` consumes
this directly — no degenerate-LMS hack needed (that was only the
Γ-only driver's molecular-limit trick).

Energy formula. Same shape as the C++ RKS DIRECT_TRUNCATED driver:

    E_elec  =  E_xc  +  Σ_k w_k · Re tr(D(k)·H_core(k))
                     +  ½ Σ_k w_k · Re tr(D(k)·F^{HF-part}(k))

where ``F^{HF-part}(k) = Bloch_k[J − (α/2)·K]`` (the part of F that's
linear in D and contributes ½·tr to the energy; V_xc is reported
through E_xc rather than a trace).

Scope.

  * Multi-k closed-shell RKS (multiplicity = 1, even electrons).
  * Pulay DIIS on per-k Fock with k-weighted Frobenius inner
    product (mirrors the multi-k RHF Ewald driver).
  * Saunders-Hillier level shift via ``options.level_shift``.
  * Fermi-Dirac smearing via ``options.smearing_temperature``
    (matches the multi-k RHF Ewald driver's free-energy convergence
    formulation: A = E − T·S).
  * Periodic Becke partition selectable via
    ``options.use_periodic_becke``.

The UKS multi-k Ewald driver (Phase 15c-3) reuses the same Pulay
DIIS / smearing / level-shift machinery but with per-spin densities.
"""

from __future__ import annotations

import math
import os
from dataclasses import dataclass, field
from typing import List, Optional, Sequence, Tuple, Union

import numpy as np

from ._vibeqc_core import (
    BasisSet,
    BlochKMesh,
    CoulombMethod,
    Functional,
    InitialGuess,
    LatticeMatrixSet,
    LatticeSumOptions,
    PeriodicKSOptions,
    PeriodicSystem,
    SCFIteration,
    XCKind,
    bloch_sum,
    build_grid,
    build_xc_periodic,
    compute_kinetic_lattice,
    compute_nuclear_lattice,
    compute_overlap_lattice,
    direct_lattice_cells,
    get_num_threads,
    nuclear_repulsion_per_cell,
    real_space_density_from_kpoints,
    real_space_density_from_kpoints_fractional,
)
from .ewald_j import auto_grid
from .guess import initial_density_closed_shell
from .madelung import (
    madelung_energy_correction_for_lat as _madelung_energy_correction_for_lat,
)
from .periodic_fock_multi_k import build_periodic_fock_ewald3d_k
from .periodic_grid import build_periodic_becke_grid
from .periodic_k_density import density_matrices_per_k as _density_matrices_per_k
from .periodic_rhf_multi_k_ewald import (
    _canonical_orthogonalizer_complex,
    _damp_lattice_matrix,
    _diag_in_orth_basis,
    _g0_block,
)
from .periodic_scf_accelerators import (
    DynamicDamping,
    MultiKPeriodicSCFAccelerator,
)
from .progress import ProgressLogger, resolve_progress
from .scf_divergence import check_scf_divergence
from .smearing import (
    closed_shell_periodic_occupations as _closed_shell_periodic_occupations,
)

__all__ = [
    "PeriodicRKSMultiKEwaldResult",
    "run_rks_periodic_multi_k_ewald3d",
]


_BYTES_PER_DOUBLE = 8
_DEFAULT_LEGACY_XC_LIMIT_GIB = 16.0
_LEGACY_XC_MEMORY_FRACTION = 0.60
_TRUE_ENV = {"1", "true", "yes", "on"}


def _env_flag(name: str) -> bool:
    return os.environ.get(name, "").strip().lower() in _TRUE_ENV


def _gib(n_bytes: int) -> float:
    return float(n_bytes) / float(1024**3)


def _detected_memory_limit_bytes() -> Optional[int]:
    """Best-effort physical/cgroup memory limit for fail-fast guards."""
    candidates: List[int] = []

    # Linux cgroup v2 and v1. These are absent on macOS and many local
    # shells; silently ignore them there.
    for path in (
        "/sys/fs/cgroup/memory.max",
        "/sys/fs/cgroup/memory/memory.limit_in_bytes",
    ):
        try:
            with open(path, "r", encoding="utf-8") as handle:
                raw = handle.read().strip()
            if raw and raw != "max":
                value = int(raw)
                # cgroup v1 may report a sentinel near LONG_MAX when no
                # memory limit is active.
                if 0 < value < 1 << 60:
                    candidates.append(value)
        except (OSError, ValueError):
            pass

    try:
        pages = int(os.sysconf("SC_PHYS_PAGES"))
        page_size = int(os.sysconf("SC_PAGE_SIZE"))
        if pages > 0 and page_size > 0:
            candidates.append(pages * page_size)
    except (AttributeError, OSError, ValueError):
        pass

    return min(candidates) if candidates else None


def _legacy_periodic_xc_limit_bytes() -> int:
    override = os.environ.get("VIBEQC_PERIODIC_XC_MAX_ESTIMATED_GB")
    if override:
        try:
            return int(float(override) * (1024**3))
        except ValueError:
            pass

    detected = _detected_memory_limit_bytes()
    default_limit = int(_DEFAULT_LEGACY_XC_LIMIT_GIB * (1024**3))
    if detected is None:
        return default_limit
    return int(_LEGACY_XC_MEMORY_FRACTION * float(detected))


def _legacy_periodic_xc_thread_count() -> int:
    try:
        return max(1, int(get_num_threads()))
    except Exception:
        pass

    raw = os.environ.get("OMP_NUM_THREADS", "").split(",", 1)[0].strip()
    if raw:
        try:
            return max(1, int(raw))
        except ValueError:
            pass
    return max(1, int(os.cpu_count() or 1))


def _estimate_legacy_periodic_xc_cache_bytes(
    *,
    n_grid: int,
    nbf: int,
    n_cells: int,
    is_gga: bool,
    n_threads: Optional[int] = None,
) -> int:
    """Rough peak estimate for the current C++ periodic-XC footprint.

    ``build_xc_periodic`` currently stores AO values for every density
    cell. GGA stores value + three gradients per cell, then each OpenMP
    worker allocates several ``n_grid x nbf`` temporaries during the
    gradient-density contraction.
    """
    persistent_matrices_per_cell = 4 if is_gga else 1
    scratch_matrices_per_thread = 7 if is_gga else 1
    active_threads = min(
        max(
            1,
            int(
                n_threads
                if n_threads is not None
                else _legacy_periodic_xc_thread_count()
            ),
        ),
        max(1, int(n_cells)),
    )
    matrix_bytes = int(n_grid) * int(nbf) * _BYTES_PER_DOUBLE
    persistent = persistent_matrices_per_cell * (int(n_cells) + 1)
    scratch = scratch_matrices_per_thread * active_threads
    return matrix_bytes * (persistent + scratch)


def _guard_legacy_periodic_xc_memory(
    *,
    n_grid: int,
    nbf: int,
    n_cells: int,
    is_gga: bool,
    functional: str,
) -> None:
    if _env_flag("VIBEQC_ALLOW_LEGACY_PERIODIC_XC_OOM"):
        return
    estimate = _estimate_legacy_periodic_xc_cache_bytes(
        n_grid=n_grid,
        nbf=nbf,
        n_cells=n_cells,
        is_gga=is_gga,
    )
    n_threads = _legacy_periodic_xc_thread_count()
    limit = _legacy_periodic_xc_limit_bytes()
    if estimate <= limit:
        return
    raise MemoryError(
        "run_rks_periodic_multi_k_ewald3d: refusing legacy periodic-XC "
        "build before the OS kills the process. Estimated rough peak "
        f"AO/XC memory = {_gib(estimate):.1f} GiB "
        f"(functional={functional!r}, n_grid={n_grid}, nbf={nbf}, "
        f"n_cells={n_cells}, threads={n_threads}, GGA={is_gga}); "
        f"guard limit = "
        f"{_gib(limit):.1f} GiB. The legacy EWALD_3D/FFT-Poisson RKS "
        "path is not suitable for large tight ionic crystals. Use the "
        "GDF periodic route where available, coarsen opts.grid for a "
        "debug run, or set VIBEQC_PERIODIC_XC_MAX_ESTIMATED_GB / "
        "VIBEQC_ALLOW_LEGACY_PERIODIC_XC_OOM=1 to override this guard."
    )


[docs] @dataclass class PeriodicRKSMultiKEwaldResult: """Result of :func:`run_rks_periodic_multi_k_ewald3d`. Per-cell scalars (energy, e_electronic, e_xc, e_coulomb, e_hf_exchange, e_nuclear) and per-k matrices (mo_energies, mo_coeffs, fock, overlap, hcore) alongside the converged real- space density ``D_real``. Layout mirrors the multi-k RHF Ewald result with KS-specific energy decomposition fields added. """ energy: float e_electronic: float e_nuclear: float e_xc: float e_coulomb: float e_hf_exchange: float n_iter: int converged: bool # Per-k arrays — lists of length ``len(kmesh.kpoints)``. mo_energies: List[np.ndarray] mo_coeffs: List[np.ndarray] fock: List[np.ndarray] overlap: List[np.ndarray] hcore: List[np.ndarray] # Real-space converged density. density: LatticeMatrixSet # Diagnostic trace. scf_trace: List[SCFIteration] = field(default_factory=list) functional: str = "" omega: float = 0.0 grid_shape: Tuple[int, int, int] = (0, 0, 0) # Smearing diagnostics (mirror multi-k RHF Ewald). smearing_temperature: float = 0.0 fermi_level: float = 0.0 entropy: float = 0.0 free_energy: float = 0.0 occupations: List[np.ndarray] = field(default_factory=list)
def _bloch_sum_lms_at_k( lms: LatticeMatrixSet, k_cart: np.ndarray, ) -> np.ndarray: """Bloch-sum a LatticeMatrixSet at a given k-point: returns the complex (n_bf, n_bf) matrix Σ_g e^{i k·R_g} M(g). Used to fold the V_xc real-space LatticeMatrixSet (output of build_xc_periodic) into per-k Fock contributions.""" return np.asarray(bloch_sum(lms, np.asarray(k_cart, dtype=float).reshape(3)))
[docs] def run_rks_periodic_multi_k_ewald3d( system: PeriodicSystem, basis: BasisSet, kmesh: BlochKMesh, options: Optional[PeriodicKSOptions] = None, *, omega: float = 0.0, grid_shape: Optional[Union[Tuple[int, int, int], int]] = None, origin: Optional[Sequence[float]] = None, spacing_bohr: float = 0.3, linear_dep_threshold: float = 1e-7, canonical_orth_normalize_diag_first: bool = True, auto_optimize_truncation: bool = True, progress: Union[bool, ProgressLogger, None] = None, verbose: Optional[int] = None, ) -> PeriodicRKSMultiKEwaldResult: """Multi-k closed-shell periodic RKS SCF with EWALD_3D Coulomb. Parameters ---------- system, basis Periodic system and AO basis. kmesh :class:`BlochKMesh` (e.g. from :func:`vibeqc.monkhorst_pack`). options Optional :class:`PeriodicKSOptions`. Defaults: PBE, DIIS on, no level shift, molecular Becke partition. Honours ``functional``, ``grid``, ``use_periodic_becke``, ``becke_image_radius_bohr``, ``level_shift``, ``smearing_temperature``, ``damping``, ``max_iter``, ``conv_tol_*``, ``diis_*``, ``initial_guess``, ``lattice_opts``. omega, grid_shape, origin, spacing_bohr Ewald splitting + FFT-Poisson grid controls. linear_dep_threshold Per-k S(k) eigenvalue floor for canonical orthogonalisation. Returns ------- :class:`PeriodicRKSMultiKEwaldResult`. """ opts = options if options is not None else PeriodicKSOptions() lat_opts: LatticeSumOptions = opts.lattice_opts plog = resolve_progress(progress, verbose=verbose) # ω must match the nuclear Ewald α so the jellium background # terms cancel exactly. When not specified explicitly, use # CRYSTAL's default α = 2.8/V^{1/3} (matching the BIPOLE driver). _ewald_tol = getattr(opts, "ewald_tolerance", 1e-12) _cutoff = getattr(opts, "ewald_cutoff_bohr", lat_opts.nuclear_cutoff_bohr) if omega <= 0.0: _user_omega = getattr(opts, "ewald_omega", None) if _user_omega is not None and float(_user_omega) > 0.0: omega = float(_user_omega) else: from .bipole_ext_el_pole import crystal_default_ewald_alpha V_cell = float(abs(np.linalg.det(np.asarray(system.lattice, dtype=float)))) omega = crystal_default_ewald_alpha(V_cell) lat = np.asarray(system.lattice, dtype=float) if grid_shape is None: grid_shape_t = auto_grid(lat, spacing_bohr) elif isinstance(grid_shape, int): grid_shape_t = (grid_shape, grid_shape, grid_shape) else: grid_shape_t = tuple(int(x) for x in grid_shape) plog.info( f"RKS multi-k EWALD_3D / functional={opts.functional!r}, " f"omega = {float(omega):.3f}, " f"FFT grid {grid_shape_t[0]}x{grid_shape_t[1]}x{grid_shape_t[2]}" ) plog.info(f"basis: {basis.name} ({basis.nbasis} BFs / {basis.nshells} shells)") from .options_dump import dump_active_settings dump_active_settings( plog, [ ("PeriodicKSOptions", opts), ("LatticeSumOptions", lat_opts), ( "Driver kwargs", { "omega": float(omega), "grid_shape": grid_shape_t, "origin": origin, "spacing_bohr": float(spacing_bohr), "linear_dep_threshold": float(linear_dep_threshold), "canonical_orth_normalize_diag_first": canonical_orth_normalize_diag_first, "auto_optimize_truncation": auto_optimize_truncation, }, ), ], ) if plog.level >= 5: from .scf_log import format_basis_summary plog.write_raw(format_basis_summary(basis)) # Closed-shell sanity. n_elec = system.n_electrons() if n_elec % 2 != 0: raise ValueError( "run_rks_periodic_multi_k_ewald3d: closed-shell RKS requires " f"even electron count; got {n_elec}" ) if system.multiplicity != 1: raise ValueError( "run_rks_periodic_multi_k_ewald3d: closed-shell RKS requires " f"multiplicity=1; got {system.multiplicity}" ) n_occ = n_elec // 2 smearing_T = float(getattr(opts, "smearing_temperature", 0.0)) if smearing_T < 0.0: raise ValueError( "run_rks_periodic_multi_k_ewald3d: smearing_temperature must be >= 0" ) # ---- Functional + DFT grid ------------------------------------------ func = Functional(opts.functional, 1) # spin-unpolarised RKS alpha = float(func.hf_exchange_fraction) if opts.use_periodic_becke: grid = build_periodic_becke_grid( system, grid_options=opts.grid, image_radius_bohr=float(opts.becke_image_radius_bohr), ) else: grid = build_grid(system.unit_cell_molecule(), opts.grid) k_points = list(kmesh.kpoints) weights = np.asarray(kmesh.weights, dtype=float) n_k = len(k_points) if n_k == 0: raise ValueError("kmesh has no k-points") if not np.isclose(weights.sum(), 1.0): raise ValueError(f"kmesh.weights must sum to 1; got {weights.sum():.6f}") plog.info( f"k-mesh: {n_k} k-point{'s' if n_k != 1 else ''}, " f"weights sum = {weights.sum():.4f}" ) # ---- Auto-optimise lattice truncation (default ON) ------------------- if auto_optimize_truncation and lat_opts.coulomb_method == CoulombMethod.EWALD_3D: from .eigs_preflight import ( format_truncation_optimization_report, optimize_truncation, ) k_arr = [np.asarray(k, dtype=float) for k in k_points] opt_rep = optimize_truncation( system, basis, lattice_opts=lat_opts, k_points_cart=k_arr, ) if ( opt_rep.n_evaluations > 1 or opt_rep.optimized_lattice_opts.cutoff_bohr != lat_opts.cutoff_bohr ): plog.write_raw(format_truncation_optimization_report(opt_rep)) if not opt_rep.converged: plog.warning("auto_optimize_truncation did not converge.") lat_opts = opt_rep.optimized_lattice_opts # The current C++ periodic XC builder materialises AO values (and, # for GGA, three AO-gradient matrices) for every real-space density # cell. For MgO/pob-TZVP-sized inputs this reaches tens of GiB and # the kernel is often SIGKILLed before Python can report anything. # Fail fast with an actionable message instead. n_xc_cells = len(direct_lattice_cells(system, float(lat_opts.cutoff_bohr))) n_xc_grid = int(np.asarray(grid.points).shape[0]) _guard_legacy_periodic_xc_memory( n_grid=n_xc_grid, nbf=int(basis.nbasis), n_cells=n_xc_cells, is_gga=(func.kind == XCKind.GGA), functional=str(opts.functional), ) # ---- Real-space one-electron integrals ------------------------------ with plog.stage( "integrals_lattice", detail=f"S/T/V at cutoff {lat_opts.cutoff_bohr:.2f} bohr" ): S_lat = compute_overlap_lattice(basis, system, lat_opts) T_lat = compute_kinetic_lattice(basis, system, lat_opts) from .periodic_v_ne import compute_nuclear_lattice_dispatch V_lat = compute_nuclear_lattice_dispatch(basis, system, lat_opts) cells = list(S_lat.cells) # Per-k S(k), Hcore(k), orthogonaliser X(k). S_k_list: List[np.ndarray] = [] Hcore_k_list: List[np.ndarray] = [] X_k_list: List[np.ndarray] = [] # Per-k linear-dependence preflight; see periodic_rhf_multi_k_ewald # for the rationale (Searle et al., ARCHER eCSE04-16, 2017). from .linear_dependence import scf_preflight_overlap_check for k_idx, k in enumerate(k_points): k_arr = np.asarray(k, dtype=float).reshape(3) S_k = np.asarray(bloch_sum(S_lat, k_arr)) T_k = np.asarray(bloch_sum(T_lat, k_arr)) V_k = np.asarray(bloch_sum(V_lat, k_arr)) H_k = T_k + V_k S_k = 0.5 * (S_k + S_k.conj().T) H_k = 0.5 * (H_k + H_k.conj().T) scf_preflight_overlap_check( S_k, plog=plog, label=f"S(k={k_idx}, k_cart={k_arr.round(4).tolist()})", basis=basis, ) X_k, n_kept = _canonical_orthogonalizer_complex( S_k, linear_dep_threshold, normalize_diag_first=canonical_orth_normalize_diag_first, ) if n_occ > n_kept: raise RuntimeError( "run_rks_periodic_multi_k_ewald3d: canonical " f"orthogonalisation at k = {k_arr} dropped too many " f"directions (n_occ = {n_occ}, n_kept = {n_kept}); " "loosen linear_dep_threshold or pick a less redundant basis." ) S_k_list.append(S_k) Hcore_k_list.append(H_k) X_k_list.append(X_k) e_nuc = float(nuclear_repulsion_per_cell(system, lat_opts)) # ---- Initial guess: diagonalize Hcore(k) ---------------------------- C_per_k: List[np.ndarray] = [] eps_per_k: List[np.ndarray] = [] for H_k, X_k in zip(Hcore_k_list, X_k_list): C_k, eps_k = _diag_in_orth_basis(H_k, X_k) C_per_k.append(C_k.astype(complex)) eps_per_k.append(eps_k) n_electrons_per_cell = float(n_elec) def _occupations_from_eps( eps_per_k_local: Sequence[np.ndarray], ) -> Tuple[List[np.ndarray], float, float]: return _closed_shell_periodic_occupations( eps_per_k_local, weights, n_electrons_per_cell, n_occ, smearing_T, ) occ_per_k, fermi_level, entropy = _occupations_from_eps(eps_per_k) if smearing_T <= 0.0: D_real = real_space_density_from_kpoints( C_per_k, [n_occ] * n_k, kmesh, cells, ) else: D_real = real_space_density_from_kpoints_fractional( C_per_k, occ_per_k, kmesh, cells, ) # Density-mode guesses: overwrite D_real at g=0 with the engine # density and zero the other cells. See run_rhf_periodic_multi_k_ewald3d # for the rationale. guess = getattr(opts, "initial_guess", InitialGuess.HCORE) D_engine = initial_density_closed_shell( system.unit_cell_molecule(), basis, n_occ, guess, is_periodic=True, ) if D_engine is not None: plog.info(f"initial guess: {guess.name} (density via GuessEngine)") for g_idx in range(len(D_real.cells)): if (D_real.cells[g_idx].index == np.array([0, 0, 0])).all(): D_real.set_block(g_idx, D_engine) else: D_real.set_block(g_idx, np.zeros_like(D_engine, dtype=float)) else: plog.info(f"initial guess: {guess.name} (Hcore-diagonalise at each k)") D_per_k = _density_matrices_per_k(C_per_k, occ_per_k) if D_engine is not None: D_engine_k = np.asarray(D_engine, dtype=complex) D_engine_k = 0.5 * (D_engine_k + D_engine_k.conj().T) D_per_k = [D_engine_k.copy() for _ in range(n_k)] D_real_prev: Optional[LatticeMatrixSet] = None D_per_k_prev: Optional[List[np.ndarray]] = None damping = float(opts.damping) if not (0.0 <= damping < 1.0): raise ValueError( f"run_rks_periodic_multi_k_ewald3d: damping must be in " f"[0, 1); got {damping}" ) use_diis = bool(opts.use_diis) diis_start_iter = int(opts.diis_start_iter) accel: Optional[MultiKPeriodicSCFAccelerator] = ( MultiKPeriodicSCFAccelerator(opts) if use_diis else None ) damper: Optional[DynamicDamping] = None if bool(getattr(opts, "dynamic_damping", False)): damper = DynamicDamping( initial_alpha=damping, alpha_min=float(getattr(opts, "dynamic_damping_min", 0.0)), alpha_max=float(getattr(opts, "dynamic_damping_max", 0.95)), ) level_shift = float(getattr(opts, "level_shift", 0.0)) # Phase C1c — quadratic SCF fallback (per-k Newton step). quadratic_fallback_iter = int(getattr(opts, "quadratic_fallback_iter", 0)) quadratic_fallback_shift = float(getattr(opts, "quadratic_fallback_shift", 0.1)) quadratic_fallback_max_step = float( getattr(opts, "quadratic_fallback_max_step", 0.1) ) # ---- SCF loop ------------------------------------------------------- scf_trace: List[SCFIteration] = [] E_prev = 0.0 F_k_list: List[np.ndarray] = [np.zeros_like(H) for H in Hcore_k_list] # XC bookkeeping — populated each iteration. E_xc = 0.0 E_coulomb_per_cell = 0.0 E_hf_K_per_cell = 0.0 E_total = 0.0 E_elec = 0.0 plog.banner(f"SCF (RKS multi-k {opts.functional!r}, EWALD_3D)") plog.info(" iter energy (Ha) dE ||[F,DS]|| DIIS") converged = False iter_idx = 0 for iter_idx in range(1, int(opts.max_iter) + 1): if damper is not None: damping = damper.alpha diis_active = use_diis and iter_idx >= diis_start_iter D_used = D_real D_used_per_k = [D.copy() for D in D_per_k] if iter_idx > 1 and damping > 0.0 and not diis_active: D_used = _damp_lattice_matrix(D_real, D_real_prev, damping) if D_per_k_prev is not None: D_used_per_k = [ damping * D_prev + (1.0 - damping) * D_cur for D_cur, D_prev in zip(D_per_k, D_per_k_prev) ] # --- HF part of the Fock at every k via the Ewald composed # dispatcher with exchange_scale = α. For pure DFT (α = 0) # the K build is internally skipped. F_HF_k_list = build_periodic_fock_ewald3d_k( basis, system, D_used, omega=float(omega), k_points_cart=[np.asarray(k) for k in k_points], Hcore_k=None, # add Hcore manually so we can also add V_xc lattice_opts=lat_opts, grid_shape=grid_shape_t, origin=origin, spacing_bohr=spacing_bohr, exchange_scale=alpha, ) # --- XC contribution. build_xc_periodic returns a LatticeMatrixSet # V_xc(g) and a scalar E_xc per unit cell. Bloch-fold to # per-k V_xc(k) so it can be added to each F(k). xc_contrib = build_xc_periodic( basis, system, grid, func, D_used, lat_opts, ) E_xc = float(xc_contrib.e_xc) V_xc_set = xc_contrib.V_xc V_xc_k_list: List[np.ndarray] = [ _bloch_sum_lms_at_k(V_xc_set, np.asarray(k)) for k in k_points ] # --- Total per-k Fock. F_k_list = [] for idx in range(n_k): F_k = Hcore_k_list[idx] + F_HF_k_list[idx] + V_xc_k_list[idx] F_k = 0.5 * (F_k + F_k.conj().T) F_k_list.append(F_k) # --- Per-cell electronic energy. # E_elec = E_xc + Σ_k w_k tr(D(k)·H(k)) + ½ Σ_k w_k tr(D(k)·F_HF(k)) # where F_HF(k) = J(k) − (α/2)·K(k) (the Ewald 2e contribution # already in F_HF_k_list, *not* including Hcore or V_xc). E_core_trace = 0.0 E_HF_trace = 0.0 grad_norm_sum = 0.0 error_k_list: List[np.ndarray] = [] for idx in range(n_k): D_k = D_used_per_k[idx] w = float(weights[idx]) E_core_trace += w * np.real(np.trace(D_k @ Hcore_k_list[idx])) E_HF_trace += 0.5 * w * np.real(np.trace(D_k @ F_HF_k_list[idx])) S_k = S_k_list[idx] FDS = F_k_list[idx] @ D_k @ S_k grad = FDS - FDS.conj().T error_k_list.append(grad) grad_norm_sum += w * float(np.linalg.norm(grad)) E_elec = E_xc + float(E_core_trace) + float(E_HF_trace) # Madelung-leak correction (v0.6.1). _D_g0 = _g0_block(D_real) _S_g0 = _g0_block(S_lat) E_madelung_fix = _madelung_energy_correction_for_lat( _D_g0, _S_g0, system, lat_opts ) E_total = E_elec + e_nuc + E_madelung_fix # Diagnostic energy breakdown — split E_HF_trace into J + K # without rebuilding (use a J-only Ewald build per cell). # Cheap for reporting: only at convergence we need to be exact. # Per-iter we leave E_coulomb / E_hf_exchange at last-converged # values; a single re-build at the end gives precise numbers. free_energy = E_total - smearing_T * entropy dE = free_energy - E_prev # Divergence detection (v0.6.2). check_scf_divergence( "run_rks_periodic_multi_k_ewald3d", iter_idx, free_energy, grad_norm_sum, dE, ) scf_trace.append( SCFIteration( iter=iter_idx, energy=float(free_energy), delta_e=float(dE if iter_idx > 1 else 0.0), grad_norm=float(grad_norm_sum), diis_subspace=(accel.subspace_size if accel is not None else 0), ) ) plog.iteration( iter_idx, energy=float(free_energy), dE=float(dE if iter_idx > 1 else 0.0), grad=float(grad_norm_sum), diis=(accel.subspace_size if accel is not None else 0), ) converged = ( iter_idx > 1 and abs(dE) < float(opts.conv_tol_energy) and grad_norm_sum < float(opts.conv_tol_grad) ) # Phase C1c gate — bypass DIIS + level shift when active. in_quadratic_phase = ( quadratic_fallback_iter > 0 and iter_idx > quadratic_fallback_iter ) new_C_per_k: List[np.ndarray] = [] new_eps_per_k: List[np.ndarray] = [] if in_quadratic_phase: from .quadratic_scf import quadratic_step for idx in range(n_k): C_k, eps_k = quadratic_step( F_k_list[idx], C_per_k[idx], eps_per_k[idx], n_occ, shift=quadratic_fallback_shift, max_step=quadratic_fallback_max_step, ) new_C_per_k.append(C_k) new_eps_per_k.append(eps_k) else: # --- SCF-accelerator extrapolation. DIIS / KDIIS run # natively on the per-k complex Hermitian matrices; # EDIIS / ADIIS / EDIIS_DIIS bridge through per-cell # blocks. ``density_k_list`` is the per-k Bloch sum of # the per-cell D_used (the RKS driver keeps density # per-cell because the XC build operates there). if accel is not None: density_k_list = [ _bloch_sum_lms_at_k(D_used, np.asarray(k)) for k in k_points ] F_ex_list = accel.extrapolate_rhf( F_k_list, error_k_list=error_k_list, density_k_list=density_k_list, energy=free_energy, mo_coeffs_k_list=C_per_k, n_occ=n_occ, weights=list(weights), cells=cells, kpoints=list(k_points), ) if diis_active: F_k_list = F_ex_list # --- Saunders-Hillier level shift per k. if level_shift != 0.0: F_for_diag: List[np.ndarray] = [] for idx in range(n_k): D_k = D_used_per_k[idx] S_k = S_k_list[idx] F_shift = ( F_k_list[idx] + level_shift * S_k - (level_shift / 2.0) * (S_k @ D_k @ S_k) ) F_shift = 0.5 * (F_shift + F_shift.conj().T) F_for_diag.append(F_shift) else: F_for_diag = F_k_list # --- Diagonalize. for idx in range(n_k): C_k, eps_k = _diag_in_orth_basis(F_for_diag[idx], X_k_list[idx]) new_C_per_k.append(C_k) new_eps_per_k.append(eps_k) C_per_k = new_C_per_k eps_per_k = new_eps_per_k occ_per_k, fermi_level, entropy = _occupations_from_eps(eps_per_k) D_new_per_k = _density_matrices_per_k(C_per_k, occ_per_k) if smearing_T > 0.0: D_real_new = real_space_density_from_kpoints_fractional( C_per_k, occ_per_k, kmesh, cells, ) else: D_real_new = real_space_density_from_kpoints( C_per_k, [n_occ] * n_k, kmesh, cells, ) D_real_prev = D_used D_per_k_prev = [D.copy() for D in D_used_per_k] D_real = D_real_new D_per_k = D_new_per_k if damper is not None: damper.update(free_energy) E_prev = free_energy if converged: break # ---- Final consistency pass on the converged D ---------------------- if converged: F_HF_k_list = build_periodic_fock_ewald3d_k( basis, system, D_real, omega=float(omega), k_points_cart=[np.asarray(k) for k in k_points], Hcore_k=None, lattice_opts=lat_opts, grid_shape=grid_shape_t, origin=origin, spacing_bohr=spacing_bohr, exchange_scale=alpha, ) # J-only build for the Coulomb / HF-exchange decomposition # (used for reporting only, not the Fock). J_only_k_list = build_periodic_fock_ewald3d_k( basis, system, D_real, omega=float(omega), k_points_cart=[np.asarray(k) for k in k_points], Hcore_k=None, lattice_opts=lat_opts, grid_shape=grid_shape_t, origin=origin, spacing_bohr=spacing_bohr, exchange_scale=0.0, ) xc_contrib = build_xc_periodic( basis, system, grid, func, D_real, lat_opts, ) E_xc = float(xc_contrib.e_xc) V_xc_set = xc_contrib.V_xc V_xc_k_list = [_bloch_sum_lms_at_k(V_xc_set, np.asarray(k)) for k in k_points] F_k_list = [] for idx in range(n_k): F_k = Hcore_k_list[idx] + F_HF_k_list[idx] + V_xc_k_list[idx] F_k = 0.5 * (F_k + F_k.conj().T) F_k_list.append(F_k) final_C_per_k: List[np.ndarray] = [] final_eps_per_k: List[np.ndarray] = [] for idx in range(n_k): C_k, eps_k = _diag_in_orth_basis(F_k_list[idx], X_k_list[idx]) final_C_per_k.append(C_k) final_eps_per_k.append(eps_k) C_per_k = final_C_per_k eps_per_k = final_eps_per_k occ_per_k, fermi_level, entropy = _occupations_from_eps(eps_per_k) if smearing_T > 0.0: D_real = real_space_density_from_kpoints_fractional( C_per_k, occ_per_k, kmesh, cells, ) else: D_real = real_space_density_from_kpoints( C_per_k, [n_occ] * n_k, kmesh, cells, ) E_core_trace = 0.0 E_HF_trace = 0.0 E_J_trace = 0.0 for idx in range(n_k): C_k = C_per_k[idx] if smearing_T > 0.0: D_k = (C_k * occ_per_k[idx][None, :].astype(complex)) @ C_k.conj().T else: C_occ = C_k[:, :n_occ] D_k = 2.0 * (C_occ @ C_occ.conj().T) w = float(weights[idx]) E_core_trace += w * np.real(np.trace(D_k @ Hcore_k_list[idx])) E_HF_trace += 0.5 * w * np.real(np.trace(D_k @ F_HF_k_list[idx])) E_J_trace += 0.5 * w * np.real(np.trace(D_k @ J_only_k_list[idx])) E_elec = E_xc + float(E_core_trace) + float(E_HF_trace) # Madelung-leak correction (v0.6.1). _D_g0 = _g0_block(D_real) _S_g0 = _g0_block(S_lat) E_madelung_fix = _madelung_energy_correction_for_lat( _D_g0, _S_g0, system, lat_opts ) E_total = E_elec + e_nuc + E_madelung_fix E_coulomb_per_cell = float(E_J_trace) # tr(D·F_HF) = tr(D·J) − (α/2)·tr(D·K) → # E_HF_trace - E_J_trace = -(α/4)·tr(D·K)·factor depending on # the ½ already included in E_HF_trace. Match the Γ-only KS # reporting convention: E_hf_exchange = -(α/2)·½·tr(D·K) = # E_HF_trace − E_J_trace. E_hf_K_per_cell = float(E_HF_trace - E_J_trace) free_energy = E_total - smearing_T * entropy plog.converged(n_iter=iter_idx, energy=E_total, converged=converged) return PeriodicRKSMultiKEwaldResult( energy=E_total, e_electronic=float(E_elec), e_nuclear=e_nuc, e_xc=float(E_xc), e_coulomb=float(E_coulomb_per_cell), e_hf_exchange=float(E_hf_K_per_cell), n_iter=iter_idx, converged=converged, mo_energies=eps_per_k, mo_coeffs=C_per_k, fock=F_k_list, overlap=S_k_list, hcore=Hcore_k_list, density=D_real, scf_trace=scf_trace, functional=str(opts.functional), omega=float(omega), grid_shape=grid_shape_t, smearing_temperature=smearing_T, fermi_level=float(fermi_level), entropy=float(entropy), free_energy=float(free_energy), occupations=[np.asarray(o, dtype=float) for o in occ_per_k], )