Source code for vibeqc.output.formats.scf_log

"""Formatters and log emitters for SCF trace output.

The native RHF driver populates ``RHFResult.scf_trace`` (a list of
``SCFIteration`` records) on every run. This module turns that trace into
either:

- a formatted multi-line string (``format_scf_trace``), or
- a stream of structured log records via the standard ``logging`` module
  (``log_scf_trace``).

By convention, log records go to the ``vibeqc.scf`` logger. A library-level
``NullHandler`` is attached in ``vibeqc.__init__`` so nothing is emitted
unless the user explicitly configures logging — consistent with how
PySCF, numpy, and every other well-behaved Python library behave.

Typical usage::

    import logging
    logging.basicConfig(level=logging.INFO)   # summary lines only
    # or:
    logging.getLogger("vibeqc.scf").setLevel(logging.DEBUG)  # per-iter too

    result = run_rhf(mol, basis)
    log_scf_trace(result)
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Optional, Sequence

import numpy as np

from ...banner import banner

try:
    # Imported lazily because properties.py pulls in the compiled core;
    # when vibeqc.scf_log is imported during test-harness probing of
    # vibeqc.banner in isolation, avoid the cascade. In normal use this
    # always succeeds.
    from ...properties import (
        dipole_moment,
        hirshfeld_charges,
        loewdin_charges,
        mayer_bond_orders,
        mulliken_charges,
    )
    from ...properties import prominent_bonds as _prominent_bonds

    _PROPERTIES_AVAILABLE = True
except Exception:
    _PROPERTIES_AVAILABLE = False

if TYPE_CHECKING:  # pragma: no cover - import guard
    from ..._vibeqc_core import RHFResult  # noqa: F401

_DEFAULT_LOGGER_NAME = "vibeqc.scf"

# Column layout:
#   iter(>=3)  energy(18.10f)  dE(+.3e)  ||grad||(.3e)  diis(2d)
_HEADER = "  iter     energy (Ha)            dE          ||[F,DS]||   DIIS"
_SEPARATOR = "  " + "-" * (len(_HEADER) - 2)

_HARTREE_TO_EV = 27.211386245988  # CODATA 2018


[docs] def format_scf_trace( result, *, molecule=None, basis=None, include_banner: bool = True, include_properties: bool = True, n_virtual: int = 5, property_status: Optional[dict] = None, ) -> str: """Return a multi-line string with a header, per-iteration lines and a summary. Safe to print, log, or write to a file. Parameters ---------- result Any SCF result struct with a ``scf_trace`` list and ``converged``, ``n_iter``, ``energy`` fields (RHFResult, UHFResult, RKSResult, UKSResult, PeriodicRHFResult, PeriodicKSResult, ...). molecule Optional :class:`Molecule`. When supplied, the trace is extended with an energy-component breakdown (DFT only), an orbital-energy table, and HOMO/LUMO markers. Without it we can't infer occupation counts from the result alone, so those sections are skipped. basis Optional :class:`BasisSet`. Required alongside ``molecule`` to compute the post-SCF properties block (Mulliken / Löwdin charges, Mayer bond orders, dipole moment). When absent, the properties block is skipped. include_banner Prepend the vibe-qc version banner. Default ``True``; set ``False`` to produce a trace without provenance (e.g. for unit-test comparisons where the version string would drift). include_properties Emit the Mulliken / Löwdin / Mayer / dipole block when molecule and basis are both supplied. Default ``True``. n_virtual Number of virtual orbitals above HOMO to print in the table. Default 5. property_status Optional mutable dict; when supplied, the post-SCF properties block records per-analysis success into it (currently ``property_status["hirshfeld"]``). Lets a caller gate a citation on whether a best-effort analysis actually computed, without recomputing it. Default ``None`` (no status reported). """ lines: list[str] = [] if include_banner: lines.append(banner()) lines.append("") lines.append(_HEADER) lines.append(_SEPARATOR) for step in result.scf_trace: lines.append(_format_iter_line(step)) lines.append(_SEPARATOR) status = "converged" if result.converged else "NOT converged" lines.append( f" {status} in {result.n_iter} iterations; E = {result.energy:.10f} Ha" ) if molecule is None: components = _format_energy_components(result) if components: lines.append("") lines.append(components) if molecule is not None: orb = _format_orbital_sections(result, molecule, n_virtual=n_virtual) if orb: lines.append("") lines.append(orb) if ( include_properties and molecule is not None and basis is not None and _PROPERTIES_AVAILABLE ): props = _format_properties_block( result, molecule, basis, status=property_status ) if props: lines.append("") lines.append(props) return "\n".join(lines)
[docs] def log_scf_trace( result, logger: Optional[logging.Logger] = None, *, level_header: int = logging.INFO, level_iter: int = logging.DEBUG, level_summary: int = logging.INFO, ) -> None: """Emit the SCF trace through the ``logging`` module. Log levels are split so users can get summary-only (default INFO) or full per-iteration output (DEBUG) by setting the logger level. """ lg = logger if logger is not None else logging.getLogger(_DEFAULT_LOGGER_NAME) if lg.isEnabledFor(level_iter): lg.log(level_iter, _HEADER) lg.log(level_iter, _SEPARATOR) for step in result.scf_trace: lg.log(level_iter, _format_iter_line(step)) lg.log(level_iter, _SEPARATOR) status = "converged" if result.converged else "NOT CONVERGED" lg.log( level_summary, "SCF %s in %d iterations; E = %.10f Ha (= %.4f eV)", status, result.n_iter, result.energy, # 1 Hartree = 27.211386245988 eV (CODATA 2018) result.energy * 27.211386245988, )
def _format_iter_line(step) -> str: de = f"{step.delta_e:+.3e}" if step.iter > 1 else " -- " diis = f"{step.diis_subspace:3d}" if step.diis_subspace > 0 else " -" return f" {step.iter:4d} {step.energy:18.10f} {de} {step.grad_norm:.3e} {diis}" def _format_orbital_sections(result, molecule, *, n_virtual: int) -> str: """Energy-component breakdown (DFT only) + orbital-energy table. Returns an empty string if the result has neither ``e_xc`` nor ``mo_energies``/``mo_energies_alpha`` — e.g. an MP2Result. """ sections: list[str] = [] components = _format_energy_components(result) if components: sections.append(components) n_electrons = molecule.n_electrons() multiplicity = molecule.multiplicity if hasattr(result, "mo_energies_alpha"): sections.append( _format_orbital_table_uhf( result, n_alpha=(n_electrons + multiplicity - 1) // 2, n_beta=n_electrons - (n_electrons + multiplicity - 1) // 2, n_virtual=n_virtual, ) ) elif hasattr(result, "mo_energies"): sections.append( _format_orbital_table_rhf( result, n_occ=n_electrons // 2, n_virtual=n_virtual ) ) return "\n\n".join(s for s in sections if s) def _format_energy_components(result) -> str: """Energy-component breakdown block. Supports two result shapes: * molecular / periodic KS results with scalar ``e_xc``-style fields; * BIPOLE periodic RHF results with a per-iteration ``energy_components`` history in CRYSTAL ``ENECYCLE`` terms. """ history = getattr(result, "energy_components", None) if history: comp = history[-1] rows: list[tuple[str, float]] = [ ("Nuclear repulsion", comp.e_nuclear_repulsion), ("Electronic", comp.e_electronic), ("Kinetic", comp.e_kinetic), ("Nuclear attraction", comp.e_nuclear_attraction), ] for label, attr in ( ("BIELET zone E-E", "e_bielet_zone_ee"), ("EXT EL-POLE", "e_ext_el_pole"), ("EXT EL-SPHEROPOLE", "e_ext_el_spheropole"), ("J short-range", "e_j_short_range"), ("J long-range", "e_j_long_range"), ("HF exchange", "e_exchange"), ): value = getattr(comp, attr, None) if value is not None: rows.append((label, value)) rows.extend( [ ("Two-electron (J+K)", comp.e_two_electron), ("Total energy", comp.e_total), ] ) header = " Energy components (Ha)" sep = " " + "-" * 52 lines = [header, sep] for label, value in rows: lines.append(f" {label:<32s} {value:18.10f}") lines.append(sep) return "\n".join(lines) if not hasattr(result, "e_xc"): return "" rows: list[tuple[str, float]] = [] for label, attr in ( ("Nuclear repulsion", "e_nuclear"), ("Electronic", "e_electronic"), ("Coulomb (J)", "e_coulomb"), ("HF exchange (K)", "e_hf_exchange"), ("Exchange-correlation (XC)", "e_xc"), ("EXT EL-SPHEROPOLE", "e_ext_el_spheropole"), ): value = getattr(result, attr, None) if value is not None: rows.append((label, value)) rows.append(("Total energy", result.energy)) header = " Energy components (Ha)" sep = " " + "-" * 52 lines = [header, sep] for label, value in rows: lines.append(f" {label:<32s} {value:18.10f}") lines.append(sep) return "\n".join(lines) def _orbital_window( energies: Sequence[float], n_occ: int, n_virtual: int ) -> tuple[int, int]: """Slice bounds [start, end) to print: all occupied + up to n_virtual.""" start = 0 end = min(len(energies), n_occ + max(0, n_virtual)) return start, end def _format_orbital_table_rhf(result, *, n_occ: int, n_virtual: int) -> str: energies = list(result.mo_energies) start, end = _orbital_window(energies, n_occ, n_virtual) lines = [" Molecular orbitals (Ha / eV)"] lines.append(" " + "-" * 54) lines.append(f" {'#':>4} {'occ':>4} {'eps (Ha)':>16} {'eps (eV)':>14}") lines.append(" " + "-" * 54) for i in range(start, end): occ = 2.0 if i < n_occ else 0.0 marker = "" if i == n_occ - 1: marker = " <-- HOMO" elif i == n_occ: marker = " <-- LUMO" lines.append( f" {i + 1:4d} {occ:4.1f} {energies[i]:16.8f} " f"{energies[i] * _HARTREE_TO_EV:14.6f}{marker}" ) if 0 < n_occ < len(energies): gap = energies[n_occ] - energies[n_occ - 1] lines.append(f" HOMO-LUMO gap: {gap:.6f} Ha ({gap * _HARTREE_TO_EV:.4f} eV)") return "\n".join(lines) def _format_orbital_table_uhf( result, *, n_alpha: int, n_beta: int, n_virtual: int ) -> str: blocks: list[str] = [] for label, energies, n_occ in ( ("Alpha", list(result.mo_energies_alpha), n_alpha), ("Beta", list(result.mo_energies_beta), n_beta), ): start, end = _orbital_window(energies, n_occ, n_virtual) lines = [f" Molecular orbitals - {label}"] lines.append(" " + "-" * 54) lines.append(f" {'#':>4} {'occ':>4} {'eps (Ha)':>16} {'eps (eV)':>14}") lines.append(" " + "-" * 54) tag = label[0] # 'A' / 'B' for i in range(start, end): occ = 1.0 if i < n_occ else 0.0 marker = "" if i == n_occ - 1: marker = f" <-- HO{tag}MO" elif i == n_occ: marker = f" <-- LU{tag}MO" lines.append( f" {i + 1:4d} {occ:4.1f} {energies[i]:16.8f} " f"{energies[i] * _HARTREE_TO_EV:14.6f}{marker}" ) if 0 < n_occ < len(energies): gap = energies[n_occ] - energies[n_occ - 1] lines.append( f" gap ({label.lower()}): {gap:.6f} Ha ({gap * _HARTREE_TO_EV:.4f} eV)" ) blocks.append("\n".join(lines)) return "\n\n".join(blocks) _ELEMENT_SYMBOLS = ( "X", "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", ) def _element(z: int) -> str: if 0 < z < len(_ELEMENT_SYMBOLS): return _ELEMENT_SYMBOLS[z] return f"Z{z}" def _format_properties_block(result, molecule, basis, *, status=None) -> str: """Mulliken / Löwdin / Hirshfeld charges, Mayer bond orders, dipole moment. Emits nothing if the property computation raises (e.g. when the basis has shells vibe-qc's Python helpers can't parse); post-SCF properties are best-effort — a missing block shouldn't crash the log writer. The Hirshfeld column is computed separately and degrades on its own: it needs a Becke-Lebedev grid build + SAD promolecule, either of which can fail independently of the (cheap, basis-space) Mulliken / Löwdin analyses. When Hirshfeld fails the table falls back to the two-column Mulliken / Löwdin layout. ``status`` Optional mutable dict. When supplied, ``status["hirshfeld"]`` is set to whether the Hirshfeld column actually computed for this call (``True`` only when the charges reached the table). This is the single observable the citation assembler gates on, so the Hirshfeld 1977 reference is emitted iff the user can see the Hirshfeld charges — never on a grid/promolecule build failure (CLAUDE.md § 8). Left untouched when the required Mulliken / Löwdin step itself fails (the whole block is absent then).""" sections: list[str] = [] try: mul = mulliken_charges(result, basis, molecule) low = loewdin_charges(result, basis, molecule) except Exception: return "" # Hirshfeld is best-effort on top of the required Mulliken/Löwdin — # a grid-build or SAD-promolecule failure must not drop the block. try: hirsh = hirshfeld_charges(result, basis, molecule).charges except Exception: hirsh = None # Record the success exactly as it lands in the table, so the runner's # citation block can cite Hirshfeld 1977 only when it really computed. if status is not None: status["hirshfeld"] = hirsh is not None atoms = list(molecule.atoms) lines = [" Atomic charges"] if hirsh is not None: width = 66 lines.append(" " + "-" * width) lines.append( f" {'#':>4} {'elem':>4} {'Mulliken':>10} " f"{'Löwdin':>10} {'Hirshfeld':>10}" ) lines.append(" " + "-" * width) for i, atom in enumerate(atoms, start=1): lines.append( f" {i:4d} {_element(int(atom.Z)):>4s} " f"{mul[i - 1]:10.6f} {low[i - 1]:10.6f} " f"{hirsh[i - 1]:10.6f}" ) lines.append(" " + "-" * width) lines.append( f" {'sum':>11s} {float(np.sum(mul)):10.6f} " f"{float(np.sum(low)):10.6f} {float(np.sum(hirsh)):10.6f}" ) else: width = 52 lines.append(" " + "-" * width) lines.append(f" {'#':>4} {'elem':>4} {'Mulliken':>10} {'Löwdin':>10}") lines.append(" " + "-" * width) for i, atom in enumerate(atoms, start=1): lines.append( f" {i:4d} {_element(int(atom.Z)):>4s} " f"{mul[i - 1]:10.6f} {low[i - 1]:10.6f}" ) lines.append(" " + "-" * width) lines.append(f" {'sum':>11s} {float(np.sum(mul)):10.6f}") sections.append("\n".join(lines)) try: B = mayer_bond_orders(result, basis, molecule) bonds = _prominent_bonds(B, molecule, threshold=0.10) except Exception: bonds = [] if bonds: lines = [" Bond orders (Mayer)"] lines.append(" " + "-" * 40) lines.append(f" {'atoms':>13} {'bond order':>12}") lines.append(" " + "-" * 40) for i, j, bo in bonds: elem_i = _element(int(atoms[i].Z)) elem_j = _element(int(atoms[j].Z)) lines.append(f" {elem_i}{i + 1:<3d}{elem_j}{j + 1:<3d} {bo:12.4f}") sections.append("\n".join(lines)) try: dip = dipole_moment(result, basis, molecule) except Exception: dip = None if dip is not None: dx_d, dy_d, dz_d = dip.components_debye() lines = [ " Dipole moment (origin: center of mass)", " " + "-" * 52, f" {'component':>12} {'a.u. (e·bohr)':>16} {'Debye':>10}", " " + "-" * 52, f" {'x':>12} {dip.x:16.8f} {dx_d:10.4f}", f" {'y':>12} {dip.y:16.8f} {dy_d:10.4f}", f" {'z':>12} {dip.z:16.8f} {dz_d:10.4f}", f" {'|μ|':>12} {dip.total:16.8f} {dip.total_debye:10.4f}", ] sections.append("\n".join(lines)) return "\n\n".join(sections) def format_basis_summary(basis) -> str: """Return a multi-line plain-text summary of a :class:`BasisSet`: its name, total basis-function / shell count, and a per-shell table of exponents and contraction coefficients. Used by the periodic SCF live progress logger at verbose level >= 5 so users can confirm exactly which exponents were loaded when sweeping basis-set choices on a long-running bulk run. The output is plain ASCII and indented to match the rest of :class:`vibeqc.ProgressLogger` emit. The shell-by-shell table is truncated only by the natural shell count; very large basis sets (pob-tzvp on a 50-atom cell, say) will dump several hundred lines. That's the price of asking for ``verbose=5`` on a live run — same convention PySCF follows when its own basis log is enabled. """ name = getattr(basis, "name", "<unknown>") nbf = int(getattr(basis, "nbasis", 0)) nshells = int(getattr(basis, "nshells", 0)) lines = [ " Basis-set details", " " + "-" * 32, f" Name: {name}", f" Functions: {nbf} in {nshells} shell{'s' if nshells != 1 else ''}", "", f" {'shell':>5s} {'l':>1s} {'atom':>4s} " f"{'n_prim':>6s} exponent coefficient", " " + "-" * 60, ] L_LABEL = ["s", "p", "d", "f", "g", "h", "i"] try: shells = basis.shells() except Exception: return "\n".join(lines) for sh_idx, sh in enumerate(shells): l = int(sh.l) l_label = L_LABEL[l] if 0 <= l < len(L_LABEL) else str(l) atom_idx = int(sh.atom_index) exponents = list(sh.exponents) coeffs = list(sh.coefficients) n_prim = len(exponents) # First primitive on the header row, rest indented underneath. lines.append( f" {sh_idx:5d} {l_label:>1s} {atom_idx:4d} " f"{n_prim:6d} {exponents[0]:14.6e} {coeffs[0]:+14.6e}" ) for e, c in zip(exponents[1:], coeffs[1:]): lines.append( f" {'':5s} {'':1s} {'':4s} {'':6s} {e:14.6e} {c:+14.6e}" ) return "\n".join(lines) __all__ = ["format_scf_trace", "format_basis_summary", "log_scf_trace"]