"""Pure-Python convenience layer around the native Molecule/Atom types.
The core ``Atom`` and ``Molecule`` classes live in the C++ extension; this
module just adds input-format parsers.
"""
from __future__ import annotations
from pathlib import Path
from typing import Union
from ._vibeqc_core import Atom, Molecule
# Bohr = Ångström × (1 / a0). CODATA 2018: a0 = 0.529177210903 Å.
ANGSTROM_TO_BOHR = 1.0 / 0.529177210903
# Elements 1-36; extend as needed. Symbols are case-normalized on lookup.
_ATOMIC_NUMBERS = {
"H": 1, "He": 2,
"Li": 3, "Be": 4, "B": 5, "C": 6, "N": 7, "O": 8, "F": 9, "Ne": 10,
"Na": 11, "Mg": 12, "Al": 13, "Si": 14, "P": 15, "S": 16, "Cl": 17, "Ar": 18,
"K": 19, "Ca": 20, "Sc": 21, "Ti": 22, "V": 23, "Cr": 24, "Mn": 25,
"Fe": 26, "Co": 27, "Ni": 28, "Cu": 29, "Zn": 30,
"Ga": 31, "Ge": 32, "As": 33, "Se": 34, "Br": 35, "Kr": 36,
}
def _atomic_number(symbol: str) -> int:
key = symbol.strip().capitalize()
try:
return _ATOMIC_NUMBERS[key]
except KeyError as exc:
raise ValueError(f"unknown element symbol: {symbol!r}") from exc
[docs]
def from_xyz(
path: Union[str, Path],
*,
charge: int = 0,
multiplicity: int = 1,
) -> Molecule:
"""Parse a standard XYZ file (positions in Ångström) into a Molecule.
Parameters
----------
path:
Path to an ``.xyz`` file. First line: atom count. Second line:
comment. Following lines: ``<symbol> <x> <y> <z>``.
charge:
Net charge of the molecule (electron count = Σ Z − charge).
multiplicity:
Spin multiplicity 2S+1. Must be ≥ 1 and have the right parity
against the electron count (validated by the native Molecule).
"""
path = Path(path)
with path.open("r", encoding="utf-8") as fh:
lines = fh.read().splitlines()
if len(lines) < 2:
raise ValueError(f"XYZ file {path} is too short")
try:
n_atoms = int(lines[0].strip())
except ValueError as exc:
raise ValueError(
f"XYZ file {path} first line must be atom count, got {lines[0]!r}"
) from exc
body = [ln for ln in lines[2 : 2 + n_atoms] if ln.strip()]
if len(body) != n_atoms:
raise ValueError(
f"XYZ file {path}: expected {n_atoms} atom lines, got {len(body)}"
)
atoms: list[Atom] = []
for idx, line in enumerate(body, start=1):
parts = line.split()
if len(parts) < 4:
raise ValueError(
f"XYZ file {path} line {idx}: need '<sym> <x> <y> <z>', got {line!r}"
)
Z = _atomic_number(parts[0])
xyz = tuple(float(p) * ANGSTROM_TO_BOHR for p in parts[1:4])
atoms.append(Atom(Z, list(xyz)))
return Molecule(atoms, charge, multiplicity)