Source code for vibeqc.output.formats.scf_log

"""Formatters and log emitters for SCF trace output.

The native RHF driver populates ``RHFResult.scf_trace`` (a list of
``SCFIteration`` records) on every run. This module turns that trace into
either:

- a formatted multi-line string (``format_scf_trace``), or
- a stream of structured log records via the standard ``logging`` module
  (``log_scf_trace``).

By convention, log records go to the ``vibeqc.scf`` logger. A library-level
``NullHandler`` is attached in ``vibeqc.__init__`` so nothing is emitted
unless the user explicitly configures logging — consistent with how
PySCF, numpy, and every other well-behaved Python library behave.

Typical usage::

    import logging
    logging.basicConfig(level=logging.INFO)   # summary lines only
    # or:
    logging.getLogger("vibeqc.scf").setLevel(logging.DEBUG)  # per-iter too

    result = run_rhf(mol, basis)
    log_scf_trace(result)
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Optional, Sequence

import numpy as np

from ...banner import banner

try:
    # Imported lazily because properties.py pulls in the compiled core;
    # when vibeqc.scf_log is imported during test-harness probing of
    # vibeqc.banner in isolation, avoid the cascade. In normal use this
    # always succeeds.
    from ...properties import (
        dipole_moment,
        hirshfeld_charges,
        loewdin_charges,
        mayer_bond_orders,
        mulliken_charges,
    )
    from ...properties import prominent_bonds as _prominent_bonds

    _PROPERTIES_AVAILABLE = True
except Exception:
    _PROPERTIES_AVAILABLE = False

if TYPE_CHECKING:  # pragma: no cover - import guard
    from ..._vibeqc_core import RHFResult  # noqa: F401

_DEFAULT_LOGGER_NAME = "vibeqc.scf"

# Column layout:
#   iter(>=3)  energy(18.10f)  dE(+.3e)  ||grad||(.3e)  diis(2d)
_HEADER = "  iter     energy (Ha)            dE          ||[F,DS]||   DIIS"
_SEPARATOR = "  " + "-" * (len(_HEADER) - 2)

_HARTREE_TO_EV = 27.211386245988  # CODATA 2018



[docs]
def format_scf_trace(
    result,
    *,
    molecule=None,
    basis=None,
    include_banner: bool = True,
    include_properties: bool = True,
    n_virtual: int = 5,
    property_status: Optional[dict] = None,
) -> str:
    """Return a multi-line string with a header, per-iteration lines and a
    summary. Safe to print, log, or write to a file.

    Parameters
    ----------
    result
        Any SCF result struct with a ``scf_trace`` list and ``converged``,
        ``n_iter``, ``energy`` fields (RHFResult, UHFResult, RKSResult,
        UKSResult, PeriodicRHFResult, PeriodicKSResult, ...).
    molecule
        Optional :class:`Molecule`. When supplied, the trace is extended
        with an energy-component breakdown (DFT only), an orbital-energy
        table, and HOMO/LUMO markers. Without it we can't infer occupation
        counts from the result alone, so those sections are skipped.
    basis
        Optional :class:`BasisSet`. Required alongside ``molecule`` to
        compute the post-SCF properties block (Mulliken / Löwdin charges,
        Mayer bond orders, dipole moment). When absent, the properties
        block is skipped.
    include_banner
        Prepend the vibe-qc version banner. Default ``True``; set ``False``
        to produce a trace without provenance (e.g. for unit-test
        comparisons where the version string would drift).
    include_properties
        Emit the Mulliken / Löwdin / Mayer / dipole block when molecule
        and basis are both supplied. Default ``True``.
    n_virtual
        Number of virtual orbitals above HOMO to print in the table.
        Default 5.
    property_status
        Optional mutable dict; when supplied, the post-SCF properties
        block records per-analysis success into it (currently
        ``property_status["hirshfeld"]``). Lets a caller gate a citation
        on whether a best-effort analysis actually computed, without
        recomputing it. Default ``None`` (no status reported).
    """
    lines: list[str] = []
    if include_banner:
        lines.append(banner())
        lines.append("")
    lines.append(_HEADER)
    lines.append(_SEPARATOR)
    for step in result.scf_trace:
        lines.append(_format_iter_line(step))
    lines.append(_SEPARATOR)
    status = "converged" if result.converged else "NOT converged"
    lines.append(
        f"  {status} in {result.n_iter} iterations; E = {result.energy:.10f} Ha"
    )
    if molecule is None:
        components = _format_energy_components(result)
        if components:
            lines.append("")
            lines.append(components)
    if molecule is not None:
        orb = _format_orbital_sections(result, molecule, n_virtual=n_virtual)
        if orb:
            lines.append("")
            lines.append(orb)
    if (
        include_properties
        and molecule is not None
        and basis is not None
        and _PROPERTIES_AVAILABLE
    ):
        props = _format_properties_block(
            result, molecule, basis, status=property_status
        )
        if props:
            lines.append("")
            lines.append(props)
    return "\n".join(lines)




[docs]
def log_scf_trace(
    result,
    logger: Optional[logging.Logger] = None,
    *,
    level_header: int = logging.INFO,
    level_iter: int = logging.DEBUG,
    level_summary: int = logging.INFO,
) -> None:
    """Emit the SCF trace through the ``logging`` module.

    Log levels are split so users can get summary-only (default INFO) or
    full per-iteration output (DEBUG) by setting the logger level.
    """
    lg = logger if logger is not None else logging.getLogger(_DEFAULT_LOGGER_NAME)

    if lg.isEnabledFor(level_iter):
        lg.log(level_iter, _HEADER)
        lg.log(level_iter, _SEPARATOR)
        for step in result.scf_trace:
            lg.log(level_iter, _format_iter_line(step))
        lg.log(level_iter, _SEPARATOR)

    status = "converged" if result.converged else "NOT CONVERGED"
    lg.log(
        level_summary,
        "SCF %s in %d iterations; E = %.10f Ha (= %.4f eV)",
        status,
        result.n_iter,
        result.energy,
        # 1 Hartree = 27.211386245988 eV (CODATA 2018)
        result.energy * 27.211386245988,
    )



def _format_iter_line(step) -> str:
    de = f"{step.delta_e:+.3e}" if step.iter > 1 else "      --   "
    diis = f"{step.diis_subspace:3d}" if step.diis_subspace > 0 else "  -"
    return f"  {step.iter:4d}  {step.energy:18.10f}  {de}  {step.grad_norm:.3e}  {diis}"


def _format_orbital_sections(result, molecule, *, n_virtual: int) -> str:
    """Energy-component breakdown (DFT only) + orbital-energy table.

    Returns an empty string if the result has neither ``e_xc`` nor
    ``mo_energies``/``mo_energies_alpha`` — e.g. an MP2Result.
    """
    sections: list[str] = []

    components = _format_energy_components(result)
    if components:
        sections.append(components)

    n_electrons = molecule.n_electrons()
    multiplicity = molecule.multiplicity

    if hasattr(result, "mo_energies_alpha"):
        sections.append(
            _format_orbital_table_uhf(
                result,
                n_alpha=(n_electrons + multiplicity - 1) // 2,
                n_beta=n_electrons - (n_electrons + multiplicity - 1) // 2,
                n_virtual=n_virtual,
            )
        )
    elif hasattr(result, "mo_energies"):
        sections.append(
            _format_orbital_table_rhf(
                result, n_occ=n_electrons // 2, n_virtual=n_virtual
            )
        )

    return "\n\n".join(s for s in sections if s)


def _format_energy_components(result) -> str:
    """Energy-component breakdown block.

    Supports two result shapes:

    * molecular / periodic KS results with scalar ``e_xc``-style fields;
    * BIPOLE periodic RHF results with a per-iteration
      ``energy_components`` history in CRYSTAL ``ENECYCLE`` terms.
    """
    history = getattr(result, "energy_components", None)
    if history:
        comp = history[-1]
        rows: list[tuple[str, float]] = [
            ("Nuclear repulsion", comp.e_nuclear_repulsion),
            ("Electronic", comp.e_electronic),
            ("Kinetic", comp.e_kinetic),
            ("Nuclear attraction", comp.e_nuclear_attraction),
        ]
        for label, attr in (
            ("BIELET zone E-E", "e_bielet_zone_ee"),
            ("EXT EL-POLE", "e_ext_el_pole"),
            ("EXT EL-SPHEROPOLE", "e_ext_el_spheropole"),
            ("J short-range", "e_j_short_range"),
            ("J long-range", "e_j_long_range"),
            ("HF exchange", "e_exchange"),
        ):
            value = getattr(comp, attr, None)
            if value is not None:
                rows.append((label, value))
        rows.extend(
            [
                ("Two-electron (J+K)", comp.e_two_electron),
                ("Total energy", comp.e_total),
            ]
        )
        header = "  Energy components (Ha)"
        sep = "  " + "-" * 52
        lines = [header, sep]
        for label, value in rows:
            lines.append(f"  {label:<32s} {value:18.10f}")
        lines.append(sep)
        return "\n".join(lines)

    if not hasattr(result, "e_xc"):
        return ""

    rows: list[tuple[str, float]] = []
    for label, attr in (
        ("Nuclear repulsion", "e_nuclear"),
        ("Electronic", "e_electronic"),
        ("Coulomb (J)", "e_coulomb"),
        ("HF exchange (K)", "e_hf_exchange"),
        ("Exchange-correlation (XC)", "e_xc"),
        ("EXT EL-SPHEROPOLE", "e_ext_el_spheropole"),
    ):
        value = getattr(result, attr, None)
        if value is not None:
            rows.append((label, value))
    rows.append(("Total energy", result.energy))

    header = "  Energy components (Ha)"
    sep = "  " + "-" * 52
    lines = [header, sep]
    for label, value in rows:
        lines.append(f"  {label:<32s} {value:18.10f}")
    lines.append(sep)
    return "\n".join(lines)


def _orbital_window(
    energies: Sequence[float], n_occ: int, n_virtual: int
) -> tuple[int, int]:
    """Slice bounds [start, end) to print: all occupied + up to n_virtual."""
    start = 0
    end = min(len(energies), n_occ + max(0, n_virtual))
    return start, end


def _format_orbital_table_rhf(result, *, n_occ: int, n_virtual: int) -> str:
    energies = list(result.mo_energies)
    start, end = _orbital_window(energies, n_occ, n_virtual)

    lines = ["  Molecular orbitals (Ha / eV)"]
    lines.append("  " + "-" * 54)
    lines.append(f"  {'#':>4}  {'occ':>4}  {'eps (Ha)':>16}  {'eps (eV)':>14}")
    lines.append("  " + "-" * 54)
    for i in range(start, end):
        occ = 2.0 if i < n_occ else 0.0
        marker = ""
        if i == n_occ - 1:
            marker = "  <-- HOMO"
        elif i == n_occ:
            marker = "  <-- LUMO"
        lines.append(
            f"  {i + 1:4d}  {occ:4.1f}  {energies[i]:16.8f}  "
            f"{energies[i] * _HARTREE_TO_EV:14.6f}{marker}"
        )
    if 0 < n_occ < len(energies):
        gap = energies[n_occ] - energies[n_occ - 1]
        lines.append(f"  HOMO-LUMO gap: {gap:.6f} Ha ({gap * _HARTREE_TO_EV:.4f} eV)")
    return "\n".join(lines)


def _format_orbital_table_uhf(
    result, *, n_alpha: int, n_beta: int, n_virtual: int
) -> str:
    blocks: list[str] = []
    for label, energies, n_occ in (
        ("Alpha", list(result.mo_energies_alpha), n_alpha),
        ("Beta", list(result.mo_energies_beta), n_beta),
    ):
        start, end = _orbital_window(energies, n_occ, n_virtual)
        lines = [f"  Molecular orbitals - {label}"]
        lines.append("  " + "-" * 54)
        lines.append(f"  {'#':>4}  {'occ':>4}  {'eps (Ha)':>16}  {'eps (eV)':>14}")
        lines.append("  " + "-" * 54)
        tag = label[0]  # 'A' / 'B'
        for i in range(start, end):
            occ = 1.0 if i < n_occ else 0.0
            marker = ""
            if i == n_occ - 1:
                marker = f"  <-- HO{tag}MO"
            elif i == n_occ:
                marker = f"  <-- LU{tag}MO"
            lines.append(
                f"  {i + 1:4d}  {occ:4.1f}  {energies[i]:16.8f}  "
                f"{energies[i] * _HARTREE_TO_EV:14.6f}{marker}"
            )
        if 0 < n_occ < len(energies):
            gap = energies[n_occ] - energies[n_occ - 1]
            lines.append(
                f"  gap ({label.lower()}): {gap:.6f} Ha ({gap * _HARTREE_TO_EV:.4f} eV)"
            )
        blocks.append("\n".join(lines))
    return "\n\n".join(blocks)


_ELEMENT_SYMBOLS = (
    "X",
    "H",
    "He",
    "Li",
    "Be",
    "B",
    "C",
    "N",
    "O",
    "F",
    "Ne",
    "Na",
    "Mg",
    "Al",
    "Si",
    "P",
    "S",
    "Cl",
    "Ar",
    "K",
    "Ca",
    "Sc",
    "Ti",
    "V",
    "Cr",
    "Mn",
    "Fe",
    "Co",
    "Ni",
    "Cu",
    "Zn",
    "Ga",
    "Ge",
    "As",
    "Se",
    "Br",
    "Kr",
)


def _element(z: int) -> str:
    if 0 < z < len(_ELEMENT_SYMBOLS):
        return _ELEMENT_SYMBOLS[z]
    return f"Z{z}"


def _format_properties_block(result, molecule, basis, *, status=None) -> str:
    """Mulliken / Löwdin / Hirshfeld charges, Mayer bond orders,
    dipole moment.

    Emits nothing if the property computation raises (e.g. when the basis
    has shells vibe-qc's Python helpers can't parse); post-SCF properties
    are best-effort — a missing block shouldn't crash the log writer.

    The Hirshfeld column is computed separately and degrades on its own:
    it needs a Becke-Lebedev grid build + SAD promolecule, either of
    which can fail independently of the (cheap, basis-space) Mulliken /
    Löwdin analyses. When Hirshfeld fails the table falls back to the
    two-column Mulliken / Löwdin layout.

    ``status``
        Optional mutable dict. When supplied, ``status["hirshfeld"]`` is
        set to whether the Hirshfeld column actually computed for this
        call (``True`` only when the charges reached the table). This is
        the single observable the citation assembler gates on, so the
        Hirshfeld 1977 reference is emitted iff the user can see the
        Hirshfeld charges — never on a grid/promolecule build failure
        (CLAUDE.md § 8). Left untouched when the required Mulliken /
        Löwdin step itself fails (the whole block is absent then)."""
    sections: list[str] = []

    try:
        mul = mulliken_charges(result, basis, molecule)
        low = loewdin_charges(result, basis, molecule)
    except Exception:
        return ""

    # Hirshfeld is best-effort on top of the required Mulliken/Löwdin —
    # a grid-build or SAD-promolecule failure must not drop the block.
    try:
        hirsh = hirshfeld_charges(result, basis, molecule).charges
    except Exception:
        hirsh = None
    # Record the success exactly as it lands in the table, so the runner's
    # citation block can cite Hirshfeld 1977 only when it really computed.
    if status is not None:
        status["hirshfeld"] = hirsh is not None

    atoms = list(molecule.atoms)
    lines = ["  Atomic charges"]
    if hirsh is not None:
        width = 66
        lines.append("  " + "-" * width)
        lines.append(
            f"  {'#':>4}  {'elem':>4}  {'Mulliken':>10}  "
            f"{'Löwdin':>10}  {'Hirshfeld':>10}"
        )
        lines.append("  " + "-" * width)
        for i, atom in enumerate(atoms, start=1):
            lines.append(
                f"  {i:4d}  {_element(int(atom.Z)):>4s}  "
                f"{mul[i - 1]:10.6f}  {low[i - 1]:10.6f}  "
                f"{hirsh[i - 1]:10.6f}"
            )
        lines.append("  " + "-" * width)
        lines.append(
            f"  {'sum':>11s} {float(np.sum(mul)):10.6f}  "
            f"{float(np.sum(low)):10.6f}  {float(np.sum(hirsh)):10.6f}"
        )
    else:
        width = 52
        lines.append("  " + "-" * width)
        lines.append(f"  {'#':>4}  {'elem':>4}  {'Mulliken':>10}  {'Löwdin':>10}")
        lines.append("  " + "-" * width)
        for i, atom in enumerate(atoms, start=1):
            lines.append(
                f"  {i:4d}  {_element(int(atom.Z)):>4s}  "
                f"{mul[i - 1]:10.6f}  {low[i - 1]:10.6f}"
            )
        lines.append("  " + "-" * width)
        lines.append(f"  {'sum':>11s} {float(np.sum(mul)):10.6f}")
    sections.append("\n".join(lines))

    try:
        B = mayer_bond_orders(result, basis, molecule)
        bonds = _prominent_bonds(B, molecule, threshold=0.10)
    except Exception:
        bonds = []
    if bonds:
        lines = ["  Bond orders (Mayer)"]
        lines.append("  " + "-" * 40)
        lines.append(f"  {'atoms':>13}  {'bond order':>12}")
        lines.append("  " + "-" * 40)
        for i, j, bo in bonds:
            elem_i = _element(int(atoms[i].Z))
            elem_j = _element(int(atoms[j].Z))
            lines.append(f"  {elem_i}{i + 1:<3d} — {elem_j}{j + 1:<3d}  {bo:12.4f}")
        sections.append("\n".join(lines))

    try:
        dip = dipole_moment(result, basis, molecule)
    except Exception:
        dip = None
    if dip is not None:
        dx_d, dy_d, dz_d = dip.components_debye()
        lines = [
            "  Dipole moment (origin: center of mass)",
            "  " + "-" * 52,
            f"  {'component':>12}  {'a.u. (e·bohr)':>16}  {'Debye':>10}",
            "  " + "-" * 52,
            f"  {'x':>12}  {dip.x:16.8f}  {dx_d:10.4f}",
            f"  {'y':>12}  {dip.y:16.8f}  {dy_d:10.4f}",
            f"  {'z':>12}  {dip.z:16.8f}  {dz_d:10.4f}",
            f"  {'|μ|':>12}  {dip.total:16.8f}  {dip.total_debye:10.4f}",
        ]
        sections.append("\n".join(lines))

    return "\n\n".join(sections)


def format_basis_summary(basis) -> str:
    """Return a multi-line plain-text summary of a :class:`BasisSet`:
    its name, total basis-function / shell count, and a per-shell
    table of exponents and contraction coefficients.

    Used by the periodic SCF live progress logger at verbose level
    >= 5 so users can confirm exactly which exponents were loaded
    when sweeping basis-set choices on a long-running bulk run.
    The output is plain ASCII and indented to match the rest of
    :class:`vibeqc.ProgressLogger` emit.

    The shell-by-shell table is truncated only by the natural shell
    count; very large basis sets (pob-tzvp on a 50-atom cell, say)
    will dump several hundred lines. That's the price of asking for
    ``verbose=5`` on a live run — same convention PySCF follows
    when its own basis log is enabled.
    """
    name = getattr(basis, "name", "<unknown>")
    nbf = int(getattr(basis, "nbasis", 0))
    nshells = int(getattr(basis, "nshells", 0))
    lines = [
        "  Basis-set details",
        "  " + "-" * 32,
        f"  Name: {name}",
        f"  Functions: {nbf} in {nshells} shell{'s' if nshells != 1 else ''}",
        "",
        f"  {'shell':>5s}  {'l':>1s}  {'atom':>4s}  "
        f"{'n_prim':>6s}   exponent           coefficient",
        "  " + "-" * 60,
    ]
    L_LABEL = ["s", "p", "d", "f", "g", "h", "i"]
    try:
        shells = basis.shells()
    except Exception:
        return "\n".join(lines)
    for sh_idx, sh in enumerate(shells):
        l = int(sh.l)
        l_label = L_LABEL[l] if 0 <= l < len(L_LABEL) else str(l)
        atom_idx = int(sh.atom_index)
        exponents = list(sh.exponents)
        coeffs = list(sh.coefficients)
        n_prim = len(exponents)
        # First primitive on the header row, rest indented underneath.
        lines.append(
            f"  {sh_idx:5d}  {l_label:>1s}  {atom_idx:4d}  "
            f"{n_prim:6d}   {exponents[0]:14.6e}   {coeffs[0]:+14.6e}"
        )
        for e, c in zip(exponents[1:], coeffs[1:]):
            lines.append(
                f"  {'':5s}  {'':1s}  {'':4s}  {'':6s}   {e:14.6e}   {c:+14.6e}"
            )
    return "\n".join(lines)


__all__ = ["format_scf_trace", "format_basis_summary", "log_scf_trace"]