"""Auto-populate :class:`ECPCenter` lists from a basis name + a Molecule.
Phase 14e of the libecpint integration: given a basis like ``lanl2dz``,
``dhf-tzvp``, etc. — one of the 13 ECP-bearing bases that
``scripts/basisset_dev/split_ecp_g94.py`` produced ``<name>.ecp``
sidecar files for — read the sidecar, extract per-element ``ncore``,
and produce a ready-to-go ``opts.ecp_centers`` + ``opts.ecp_library``
without making the user do the bookkeeping by hand.
Public surface:
* :func:`parse_sidecar_path(path)` — read one ``<name>.ecp`` file,
return a list of :class:`EcpHeader` (one per element block).
* :func:`sidecar_path_for(basis_name)` — locate ``<name>.ecp`` next
to ``<name>.g94`` under ``$LIBINT_DATA_PATH/basis/``. Returns
``None`` if the basis has no ECP sidecar (i.e. is all-electron).
* :func:`library_for(basis_name, ncore)` — pick the right libecpint
XML library for a given basis-name + ncore combination. Returns
``None`` for non-standard ncores like vDZP's per-element customs.
* :func:`auto_ecp_centers(mol, basis_name, library_name=None)` — the
one-call helper. Returns ``(ecp_centers, library_name)`` ready to
drop into ``RHFOptions`` / ``UHFOptions`` / ``RKSOptions`` /
``UKSOptions``.
Caveats and known gaps (from `HANDOVER_DOCS_LIBECPINT.md`):
* **Mixed-library molecules unsupported in v0.7.x.** The C++ SCF
drivers take a single ``ecp_library`` string; auto-population
raises ``ValueError`` when the molecule's atoms span more than
one libecpint XML library. Phase 14g (inline-primitive feed via
``set_ecp_basis(...)`` instead of ``set_ecp_basis_from_library``)
removes this restriction.
* **Non-standard ncore values** (vDZP's B/C/N/etc with ``ncore=2``,
``ncore=3``, etc.) match no standard libecpint library; auto-
population raises ``NotImplementedError`` with a Phase 14g
pointer. vDZP's inline ECPs are valid data, just not yet
consumable.
"""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
# ---------- Element-symbol lookup -------------------------------------------
#
# BSE-style sidecars come in mixed-case (Na, Mg) AND all-caps (NA, MG)
# depending on the era of the source data (LANL files are all-caps,
# dhf is mixed). Normalise to the canonical title-case symbol on
# parse.
_SYMBOL_TO_Z: dict[str, int] = {
sym: z for z, sym in enumerate([
"", "H", "He",
"Li", "Be", "B", "C", "N", "O", "F", "Ne",
"Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar",
"K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
"Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se",
"Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo",
"Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn",
"Sb", "Te", "I", "Xe", "Cs", "Ba",
"La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
"Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf",
"Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg",
"Tl", "Pb", "Bi", "Po", "At", "Rn",
# Actinides (Z=87..103) covered for LANL08 / lanl2dz heavy
# elements. Without these, lanl2dz.ecp's "U-ECP" line fails
# the parse.
"Fr", "Ra",
"Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm",
"Bk", "Cf", "Es", "Fm", "Md", "No", "Lr",
])
}
def _normalise_symbol(raw: str) -> str:
"""Map ``"NA"`` / ``"Na"`` / ``"na"`` to canonical ``"Na"``."""
s = raw.strip()
if not s:
return s
return s[0].upper() + s[1:].lower()
# ---------- Sidecar parser --------------------------------------------------
# BSE / NWChem ``.g94`` ECP-block convention:
# <Sym>-ECP <lmax> <ncore>
# Confirmed against LANL2DZ sodium (NA-ECP 2 10 → lmax=2 d-projection,
# ncore=10 = [Ne]) and dhf-TZVP rubidium (RB-ECP 4 28 → lmax=4,
# ncore=28 = [Ar]3d¹⁰).
_ECP_HEADER_RE = re.compile(
r"^\s*([A-Z][A-Za-z]?)-ECP\s+(\d+)\s+(\d+)\s*$"
)
@dataclass(frozen=True)
class EcpHeader:
"""One ``<Sym>-ECP <lmax> <ncore>`` row from a sidecar.
Attributes
----------
symbol : str
Canonical title-case element symbol (``"Na"``, ``"Rb"``).
Z : int
Atomic number derived from ``symbol``.
lmax : int
Maximum angular momentum used in the ECP expansion.
ncore : int
Number of replaced core electrons. Subtract from atomic ``Z``
to get the valence-electron count for the SCF.
"""
symbol: str
Z: int
lmax: int
ncore: int
def parse_sidecar_path(path: Path) -> list[EcpHeader]:
"""Parse a ``.ecp`` sidecar file. Returns one :class:`EcpHeader`
per element block, in source order. Skips comments and blank
lines; raises :class:`ValueError` on malformed headers.
"""
text = Path(path).read_text(errors="replace")
out: list[EcpHeader] = []
for line in text.splitlines():
m = _ECP_HEADER_RE.match(line)
if not m:
continue
sym = _normalise_symbol(m.group(1))
if sym not in _SYMBOL_TO_Z:
raise ValueError(
f"{path}: ECP header references unknown element "
f"{m.group(1)!r}"
)
out.append(EcpHeader(
symbol=sym, Z=_SYMBOL_TO_Z[sym],
lmax=int(m.group(2)), ncore=int(m.group(3)),
))
return out
def sidecar_path_for(basis_name: str) -> Optional[Path]:
"""Locate ``<basis_name>.ecp`` under ``$LIBINT_DATA_PATH/basis/``.
Returns ``None`` when no sidecar exists (i.e. the basis is
all-electron). The path resolution mirrors libint's: at vibe-qc
import time ``__init__.py`` points ``$LIBINT_DATA_PATH`` at the
bundled ``basis_library/`` directory; basis names resolve under
``<that>/basis/<name>.g94`` and we look for ``.ecp`` alongside.
"""
root = os.environ.get("LIBINT_DATA_PATH")
if not root:
return None
candidate = Path(root) / "basis" / f"{basis_name.lower()}.ecp"
return candidate if candidate.is_file() else None
# ---------- Library-name resolution ----------------------------------------
#
# libecpint ships a fixed set of XML ECP libraries under
# ``python/vibeqc/ecp_library/xml/``: ecp10mdf, ecp28mdf, ecp46mdf,
# ecp60mdf, ecp78mdf, lanl2dz. The basis name + ncore tell us which
# one to point at.
# Standard ncore-keyed mapping for the Stuttgart-Köln MDF series.
# Used when the basis name itself isn't a libecpint library
# (e.g. dhf-tzvp + Rb has ncore=28 → ecp28mdf).
_NCORE_TO_MDF: dict[int, str] = {
10: "ecp10mdf",
28: "ecp28mdf",
46: "ecp46mdf",
60: "ecp60mdf",
78: "ecp78mdf",
}
# Bases whose name maps directly to a libecpint XML library —
# ncore-irrelevant since the same XML covers every element in the
# basis. Add to this set as new ECP basis families ship XML.
_BASIS_TO_LIBRARY: dict[str, str] = {
"lanl2dz": "lanl2dz",
"lanl2dzdp": "lanl2dz",
"lanl2tz": "lanl2dz",
"lanl08": "lanl2dz",
"lanl08(d)": "lanl2dz",
"lanl08(f)": "lanl2dz",
}
def library_for(basis_name: str, ncore: int) -> Optional[str]:
"""Pick the libecpint XML library name for ``(basis_name, ncore)``.
Resolution order:
1. If the basis name is one of the LANL family, use
``"lanl2dz"`` (every LANL element lives in that single XML).
2. Otherwise consult :data:`_NCORE_TO_MDF` keyed on ``ncore``
— covers def2-TZVP/QZVP heavy elements, dhf-*, x2c-*, and
any other Stuttgart-Köln MDF-derived basis.
3. Return ``None`` for non-standard ncores (vDZP's
per-element customs, ``ncore=2``, ``ncore=3``, etc.).
The ``None`` return is the auto-populator's "no standard
library matches; user must provide an inline-primitive feed
(Phase 14g)" signal.
"""
name = basis_name.lower()
if name in _BASIS_TO_LIBRARY:
return _BASIS_TO_LIBRARY[name]
return _NCORE_TO_MDF.get(ncore)
# ---------- The one-call helper --------------------------------------------
[docs]
def auto_ecp_centers(mol, basis_name: str,
library_name: Optional[str] = None) -> tuple:
"""Build ``(ecp_centers, library_name)`` from a Molecule + basis name.
For each atom in ``mol`` whose Z appears in
``<basis_name>.ecp``, emits an ``ECPCenter(Z, xyz)`` and resolves
the right libecpint XML library. The two returned values can be
assigned directly to ``opts.ecp_centers`` and ``opts.ecp_library``
on any of the four molecular SCF Options classes.
Parameters
----------
mol : vibeqc.Molecule
The molecule whose heavy atoms need ECP centres built.
basis_name : str
Basis-set name as you'd pass to ``vq.BasisSet(mol, name)``.
library_name : Optional[str]
Override the auto-resolved libecpint XML library. Useful when
a basis ships a custom XML the user dropped into
``$VIBEQC_ECP_SHARE_DIR/xml/``.
Returns
-------
(centers, library_name) : tuple[list[ECPCenter], str]
Empty list and the input ``library_name`` (or ``""``) if the
basis is all-electron; otherwise a populated list.
Raises
------
ValueError
When the molecule's heavy atoms would need MORE THAN ONE
libecpint XML library (mixed-row case — common for dhf with
a Rb–Cs span). Calls out Phase 14g as the unblocker.
NotImplementedError
When the basis has at least one ECP atom whose ncore doesn't
match any standard libecpint library (vDZP-style customs).
"""
from . import ECPCenter # local: avoid hard-import for doc builds
sidecar = sidecar_path_for(basis_name)
if sidecar is None:
# All-electron basis. Nothing to build; honour any explicit
# library_name the caller already provided.
return [], (library_name or "")
headers = parse_sidecar_path(sidecar)
by_z = {h.Z: h for h in headers}
if not by_z:
return [], (library_name or "")
# Walk mol's atoms; emit centres for every ECP-bearing match.
# ``Molecule.atoms`` is a property returning a list, not a method.
centres: list = []
libraries_seen: set[str] = set()
for atom in mol.atoms:
h = by_z.get(int(atom.Z))
if h is None:
continue # all-electron atom, skip
ec = ECPCenter()
ec.Z = h.Z
ec.xyz = list(atom.xyz)
centres.append(ec)
if library_name is None:
lib = library_for(basis_name, h.ncore)
if lib is None:
raise NotImplementedError(
f"Basis {basis_name!r} has a non-standard ECP for "
f"{h.symbol} (ncore={h.ncore}, lmax={h.lmax}) — no "
"matching libecpint XML library is bundled. This "
"is the vDZP / inline-primitive case; needs Phase "
"14g (libecpint set_ecp_basis(...) direct feed) to "
"use end-to-end. As a workaround, build "
"ecp_centers manually and switch to a basis with a "
"standard ECP (e.g. def2-TZVP + def2-ECP)."
)
libraries_seen.add(lib)
if library_name is None:
if len(libraries_seen) == 0:
# No mol atom matched a sidecar element — basis carries
# ECPs but the molecule doesn't need any. Fine; return
# empty list + empty library.
return [], ""
if len(libraries_seen) > 1:
raise ValueError(
f"Basis {basis_name!r} would need {sorted(libraries_seen)} "
"libecpint libraries to cover this molecule (mixed-row "
"ECP), but vibe-qc's SCF drivers accept exactly one "
"ecp_library per call. Split the molecule into "
"single-library subsets, or wait for Phase 14g (one "
"library per atom)."
)
library_name = next(iter(libraries_seen))
return centres, library_name