Source code for vibeqc.molecule

"""Pure-Python convenience layer around the native Molecule/Atom types.

The core ``Atom`` and ``Molecule`` classes live in the C++ extension; this
module just adds input-format parsers.
"""

from __future__ import annotations

from pathlib import Path
from typing import Union

from ._vibeqc_core import Atom, Molecule

# Bohr = Ångström × (1 / a0). CODATA 2018: a0 = 0.529177210903 Å.
ANGSTROM_TO_BOHR = 1.0 / 0.529177210903

# Elements 1-36; extend as needed. Symbols are case-normalized on lookup.
_ATOMIC_NUMBERS = {
    "H":  1, "He": 2,
    "Li": 3, "Be": 4, "B":  5, "C":  6, "N":  7, "O":  8, "F":  9, "Ne": 10,
    "Na": 11, "Mg": 12, "Al": 13, "Si": 14, "P":  15, "S":  16, "Cl": 17, "Ar": 18,
    "K":  19, "Ca": 20, "Sc": 21, "Ti": 22, "V":  23, "Cr": 24, "Mn": 25,
    "Fe": 26, "Co": 27, "Ni": 28, "Cu": 29, "Zn": 30,
    "Ga": 31, "Ge": 32, "As": 33, "Se": 34, "Br": 35, "Kr": 36,
}


def _atomic_number(symbol: str) -> int:
    key = symbol.strip().capitalize()
    try:
        return _ATOMIC_NUMBERS[key]
    except KeyError as exc:
        raise ValueError(f"unknown element symbol: {symbol!r}") from exc



[docs]
def from_xyz(
    path: Union[str, Path],
    *,
    charge: int = 0,
    multiplicity: int = 1,
) -> Molecule:
    """Parse a standard XYZ file (positions in Ångström) into a Molecule.

    Parameters
    ----------
    path:
        Path to an ``.xyz`` file. First line: atom count. Second line:
        comment. Following lines: ``<symbol> <x> <y> <z>``.
    charge:
        Net charge of the molecule (electron count = Σ Z − charge).
    multiplicity:
        Spin multiplicity 2S+1. Must be ≥ 1 and have the right parity
        against the electron count (validated by the native Molecule).
    """
    path = Path(path)
    with path.open("r", encoding="utf-8") as fh:
        lines = fh.read().splitlines()

    if len(lines) < 2:
        raise ValueError(f"XYZ file {path} is too short")

    try:
        n_atoms = int(lines[0].strip())
    except ValueError as exc:
        raise ValueError(
            f"XYZ file {path} first line must be atom count, got {lines[0]!r}"
        ) from exc

    body = [ln for ln in lines[2 : 2 + n_atoms] if ln.strip()]
    if len(body) != n_atoms:
        raise ValueError(
            f"XYZ file {path}: expected {n_atoms} atom lines, got {len(body)}"
        )

    atoms: list[Atom] = []
    for idx, line in enumerate(body, start=1):
        parts = line.split()
        if len(parts) < 4:
            raise ValueError(
                f"XYZ file {path} line {idx}: need '<sym> <x> <y> <z>', got {line!r}"
            )
        Z = _atomic_number(parts[0])
        xyz = tuple(float(p) * ANGSTROM_TO_BOHR for p in parts[1:4])
        atoms.append(Atom(Z, list(xyz)))

    return Molecule(atoms, charge, multiplicity)