Source code for vibeqc.io.orca_hess

"""ORCA ``.hess`` ASCII writer for vibe-qc :class:`HessianResult` objects.

Phase M1. The ``.hess`` format is ORCA's documented ASCII Hessian
container, consumed natively by:

  - **moltui** (``moltui out.hess`` for animated normal-mode display);
  - **chemcraft**, **avogadro**, **gabedit** (vibrational-mode
    visualization);
  - **VMD** (via the molefacture / nmwiz plugin).

This is the cleanest interchange path between vibe-qc and the
visualization ecosystem because ``.hess`` is fully ASCII (no
proprietary binary structure like ``.gbw``) and version-stable.

Format reference: extracted from a real ORCA 6.1.1 NumFreq run on
H₂O / HF / STO-3G. The 14 sections ORCA writes, in order:

  1. ``$orca_hessian_file`` — header
  2. ``$act_atom`` — atom index for partial Hessians (``0`` for full)
  3. ``$act_coord`` — coordinate index (``0`` for full)
  4. ``$act_energy`` — total electronic energy in Ha
  5. ``$multiplicity``
  6. ``$hessian`` — (3N, 3N) Cartesian Hessian in Ha/bohr², printed in
     5-columns-per-block layout with row labels
  7. ``$vibrational_frequencies`` — 3N freqs in cm⁻¹ (zero modes 0.0,
     imaginary modes negative)
  8. ``$normal_modes`` — (3N, 3N) mass-weighted normal-mode matrix
     (each *column* is a mode), 5-columns-per-block layout
  9. ``$atoms`` — N atoms with ``label mass(amu) x y z`` in bohr
 10. ``$actual_temperature`` — finite-T data temperature; 0.0 by default
 11. ``$frequency_scale_factor`` — ORCA's harmonic-to-empirical scale
 12. ``$dipole_derivatives`` — (3N, 3) ∂μ/∂R tensor in atomic units;
     **only written when present on the HessianResult**
 13. ``$ir_spectrum`` — N rows of ``wavenumber eps Int TX TY TZ``;
     **only written when dipole derivatives are present**
 14. ``$end``

When ``HessianResult.dipole_derivatives is None`` the writer emits a
zeroed ``$dipole_derivatives`` block + a zeroed ``$ir_spectrum`` so
the file remains readable by tools that scan for those sections; the
mode-amplitude information (which moltui actually uses) is unaffected.
"""

from __future__ import annotations

import os
from typing import Optional, Union

import numpy as np

from .._vibeqc_core import Atom, Molecule
from ..hessian import HessianResult, ir_intensities


__all__ = ["write_orca_hess"]


# ----------------------------------------------------------------------
# Element symbols — covers Z = 1..118
# ----------------------------------------------------------------------

_ELEMENT_SYMBOLS = (
    "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne",
    "Na", "Mg", "Al", "Si", "P",  "S",  "Cl", "Ar", "K",  "Ca",
    "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
    "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr",
    "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn",
    "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
    "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb",
    "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
    "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
    "Pa", "U",  "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm",
    "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds",
    "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og",
)


def _element_symbol(z: int) -> str:
    if 1 <= z <= len(_ELEMENT_SYMBOLS):
        return _ELEMENT_SYMBOLS[z - 1]
    return f"Z{z}"


# ----------------------------------------------------------------------
# Block-printing helpers — match ORCA's exact layout so character-by-
# character diff against an ORCA-produced file passes.
# ----------------------------------------------------------------------

def _write_block_matrix(fh, M: np.ndarray, *, cols_per_block: int = 5) -> None:
    """Write a 2-D matrix in ORCA's ``$hessian`` / ``$normal_modes``
    layout — split columns into chunks of ``cols_per_block``, each
    chunk preceded by a header row of column indices, followed by
    rows labeled by row index.

    Layout (matches ORCA 6.x exactly so the file is a drop-in for any
    consumer that does positional parsing rather than tokenising on
    whitespace):

      - Row label: ``%5d``
      - First column number: ``%22.10E`` (6-space-leading for positive,
        5-space + sign for negative) — extra wide so the row label
        and the first datum read clearly.
      - Subsequent column numbers: ``%19.10E`` (3-space-leading for
        positive, 2-space + sign for negative).

    Header row mirrors the column-number layout: 5-space row-label gap,
    then per-column ``%22d`` for the first column and ``%19d`` for the
    rest, with 8 trailing spaces (ORCA's habit).
    """
    n_rows, n_cols = M.shape
    for c0 in range(0, n_cols, cols_per_block):
        c1 = min(c0 + cols_per_block, n_cols)
        # Header line follows ORCA's Fortran format:
        #   (18X, I3, 4(16X, I3), 8X)
        # i.e. 18 leading spaces, col index in 3 chars, then 16-space
        # gap + 3-char col index per subsequent column, with 8 trailing
        # spaces. Total = 18 + 3 + 4*(16+3) + 8 = 105 chars for a
        # 5-wide block.
        header = " " * 18
        for j, c in enumerate(range(c0, c1)):
            if j > 0:
                header += " " * 16
            header += f"{c:>3d}"
        header += " " * 8
        fh.write(header + "\n")
        # Data lines:
        #   (I5, 6X, ES16.10, 4(3X, ES16.10))
        # i.e. 5-wide row label, 6 spaces, first 16-char number, then
        # 3-space gap + 16-char number per subsequent column. The
        # %22.10E and %19.10E formats produce identical output for
        # positive numbers; for negatives the sign consumes 1 of the
        # leading spaces (5 + 16 instead of 6 + 16).
        for r in range(n_rows):
            line = f"{r:>5d}"
            for j, c in enumerate(range(c0, c1)):
                width = 22 if j == 0 else 19
                line += f"{M[r, c]:>{width}.10E}"
            fh.write(line + "\n")


def _write_dipole_derivatives(fh, D: np.ndarray) -> None:
    """``$dipole_derivatives`` block: 3N rows × 3 cols, no labels."""
    n = D.shape[0]
    fh.write("$dipole_derivatives\n")
    fh.write(f"{n}\n")
    for r in range(n):
        fh.write(
            f"   {D[r, 0]:.10E}   {D[r, 1]:.10E}   {D[r, 2]:.10E}\n"
        )


# ----------------------------------------------------------------------
# Public writer
# ----------------------------------------------------------------------

[docs] def write_orca_hess( path: Union[str, os.PathLike], molecule: Molecule, result: HessianResult, *, energy_ha: float = 0.0, multiplicity: int = 1, actual_temperature_K: float = 0.0, frequency_scale_factor: float = 1.0, ) -> None: """Write a vibe-qc :class:`HessianResult` to an ORCA-format ``.hess`` file. Parameters ---------- path: Output file path (overwritten if it exists). molecule: The :class:`Molecule` the Hessian was computed for. Atom species, masses, and Cartesian positions go into the ``$atoms`` block. result: :class:`HessianResult` carrying the Cartesian Hessian, mass- weighted normal modes, and frequencies. energy_ha: Total electronic energy at the reference geometry, in Hartree. Goes into ``$act_energy``. Pass the SCF total energy here. multiplicity: Spin multiplicity 2S+1. Default 1 (singlet). actual_temperature_K: Finite-temperature data temperature in K. ORCA writes 0.0 by default; we follow. frequency_scale_factor: Empirical harmonic-to-experimental frequency scaling factor (e.g. 0.95 for HF, 0.97 for DFT). 1.0 = no scaling. Default 1.0. Notes ----- moltui consumes ``$atoms``, ``$vibrational_frequencies``, and ``$normal_modes`` to animate vibrational modes. The Hessian + dipole-derivative + IR-spectrum blocks are written unconditionally (zeroed when not available on the HessianResult) so the file is a drop-in for any consumer that scans for them. Examples -------- >>> from vibeqc import ( ... Molecule, BasisSet, run_rhf, RHFOptions, ... compute_hessian_rhf_analytic, ... ) >>> from vibeqc.io import write_orca_hess >>> mol = Molecule.from_xyz("h2o.xyz") >>> basis = BasisSet(mol, "sto-3g") >>> rhf = run_rhf(mol, basis, RHFOptions()) >>> hess = compute_hessian_rhf_analytic(mol, basis, rhf, ... basis_name="sto-3g") >>> write_orca_hess("h2o.hess", mol, hess, energy_ha=rhf.energy) """ H = np.asarray(result.hessian, dtype=np.float64) L = np.asarray(result.normal_modes, dtype=np.float64) freqs = np.asarray(result.frequencies_cm1, dtype=np.float64) n_dof = H.shape[0] if H.shape != (n_dof, n_dof): raise ValueError( f"write_orca_hess: hessian has shape {H.shape}; expected square." ) if L.shape != (n_dof, n_dof) or freqs.shape != (n_dof,): raise ValueError( f"write_orca_hess: normal_modes shape {L.shape} / frequencies " f"shape {freqs.shape} inconsistent with Hessian shape {H.shape}." ) n_atoms = n_dof // 3 if 3 * n_atoms != n_dof: raise ValueError( f"write_orca_hess: 3N = {n_dof} is not divisible by 3." ) if len(molecule.atoms) != n_atoms: raise ValueError( f"write_orca_hess: molecule has {len(molecule.atoms)} atoms but " f"Hessian implies {n_atoms}." ) masses_amu = ( np.asarray(result.masses_amu, dtype=np.float64) if result.masses_amu is not None else _default_masses_amu(molecule) ) if masses_amu.shape != (n_atoms,): raise ValueError( f"write_orca_hess: masses_amu shape {masses_amu.shape}; " f"expected ({n_atoms},)." ) # Dipole derivatives (3N, 3) — zero pad if missing. if result.dipole_derivatives is not None: D = np.asarray(result.dipole_derivatives, dtype=np.float64) ir_int = ir_intensities(result) have_dipole = True else: D = np.zeros((n_dof, 3), dtype=np.float64) ir_int = np.zeros(n_dof, dtype=np.float64) have_dipole = False with open(path, "w", encoding="utf-8") as fh: # Section 1 — header fh.write("\n") fh.write("$orca_hessian_file\n") fh.write("\n") # Sections 2 + 3 — partial-Hessian markers (zero = full Hessian) fh.write("$act_atom\n 0\n\n") fh.write("$act_coord\n 0\n\n") # Section 4 — total energy fh.write("$act_energy\n") fh.write(f" {energy_ha:.6f}\n\n") # Section 5 — multiplicity fh.write("$multiplicity\n") fh.write(f" {int(multiplicity)}\n\n") # Section 6 — Cartesian Hessian fh.write("$hessian\n") fh.write(f"{n_dof}\n") _write_block_matrix(fh, H) fh.write("\n") # Section 7 — frequencies fh.write("$vibrational_frequencies\n") fh.write(f"{n_dof}\n") for i, f in enumerate(freqs): fh.write(f" {i:<5d}{f:>22.16f}\n") fh.write("\n") # Section 8 — normal modes (mass-weighted, columns) fh.write("$normal_modes\n") fh.write(f"{n_dof} {n_dof}\n") _write_block_matrix(fh, L) fh.write("\n") # Section 9 — atoms (label, mass[amu], x, y, z[bohr]) fh.write("#\n# The atoms: label mass x y z (in bohrs)\n#\n") fh.write("$atoms\n") fh.write(f"{n_atoms}\n") for i, a in enumerate(molecule.atoms): sym = _element_symbol(a.Z) x, y, z = a.xyz fh.write( f" {sym:<4s} {masses_amu[i]:>10.5f} " f"{x: .12f} {y: .12f} {z: .12f}\n" ) fh.write("\n") # Section 10 — finite-T temperature fh.write("$actual_temperature\n") fh.write(f" {actual_temperature_K:.6f}\n\n") # Section 11 — frequency scale factor fh.write("$frequency_scale_factor\n") fh.write(f" {frequency_scale_factor:.6f}\n\n") # Section 12 — dipole derivatives (3N rows × 3 cols) _write_dipole_derivatives(fh, D) fh.write("\n") # Section 13 — IR spectrum (N rows: wavenumber, eps, Int, TX, TY, TZ) fh.write("#\n# The IR spectrum\n# wavenumber eps Int TX TY TZ\n#\n") fh.write("$ir_spectrum\n") fh.write(f"{n_dof}\n") for i in range(n_dof): wavenum = freqs[i] if freqs[i] >= 0 else 0.0 intensity = ir_int[i] if have_dipole else 0.0 # ``eps`` (extinction coefficient) is intensity / (some # frequency-dependent prefactor); we leave it 0 unless we # have IR intensities — moltui doesn't read it anyway. eps = 0.0 tx, ty, tz = (D[i, 0], D[i, 1], D[i, 2]) if have_dipole \ else (0.0, 0.0, 0.0) fh.write( f" {wavenum:>7.2f} {eps:.8f} {intensity:.8f} " f"{tx: .6f} {ty: .6f} {tz: .6f}\n" ) fh.write("\n") # Section 14 — sentinel fh.write("\n$end\n") fh.write("\n")
# ---------------------------------------------------------------------- # Internal — fallback masses when HessianResult.masses_amu is missing # (older code paths). # ---------------------------------------------------------------------- def _default_masses_amu(molecule: Molecule) -> np.ndarray: """Per-atom isotope-averaged masses keyed by atomic number. We duplicate the small table from ``hessian._resolve_masses`` rather than importing it (private helper, but identical numerical contents) — keeps this module self-contained for the writer. """ # Standard atomic weights (IUPAC 2021), most-abundant-isotope- # averaged. Same reference table used elsewhere in vibe-qc. table = { 1: 1.00800, 2: 4.00260, 3: 6.94000, 4: 9.01218, 5: 10.81000, 6: 12.01100, 7: 14.00700, 8: 15.99900, 9: 18.99800, 10: 20.18000, 11: 22.98977, 12: 24.30500, 13: 26.98154, 14: 28.08500, 15: 30.97376, 16: 32.06000, 17: 35.45000, 18: 39.94800, 19: 39.09830, 20: 40.07800, } masses = np.zeros(len(molecule.atoms), dtype=np.float64) for i, atom in enumerate(molecule.atoms): masses[i] = table.get(int(atom.Z), float(atom.Z) * 2.0) return masses