"""QVF reader — the core file-open lifecycle (§2.4 of the design doc).

Responsibilities:
- Open .qvf zip archive, extract + validate manifest.json
- Parse sections into pydantic models
- Verify sha256 of every binary member before use (Rule 4)
- Lazy extraction of volumetric .dat blobs
"""

from __future__ import annotations

import hashlib
import io
import json
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import IO, Any, Union

import jsonschema
import numpy as np
from pydantic import BaseModel, ConfigDict, Field, field_validator

# ── Pydantic models for manifest.json ─────────────────────────────────────


class Source(BaseModel):
    program: str
    version: str
    calculation: str


class MemberSpec(BaseModel):
    """One member of a section (a file inside the zip)."""

    path: str
    format: str  # "json" or "binary"
    sha256: str
    dtype: str | None = None
    shape: list[int] | None = None

    @field_validator("sha256")
    @classmethod
    def _validate_sha256(cls, v: str) -> str:
        if len(v) != 64 or not all(c in "0123456789abcdef" for c in v):
            raise ValueError(f"sha256 must be 64 hex chars, got: {v!r}")
        return v


class Section(BaseModel):
    """One section from the manifest."""

    model_config = ConfigDict(extra="allow")
    id: str
    kind: str
    members: dict[str, MemberSpec] = Field(default_factory=dict)
    component: str | None = None
    # Section-level cross-reference for `reaction.waypoints`. Other
    # cross-refs (e.g. volume.difference.operand_a/_b) come through
    # model_extra.
    trajectory_ref: str | None = None


class ViewerDefaults(BaseModel):
    """Viewer defaults from manifest.json.

    Known fields (auto_open) are explicit. All other keys are per-section
    hints (e.g. ``"density": {"isovalue": 0.05, ...}``) and are captured
    via Pydantic's extra="allow" in ``model_extra``.
    """

    model_config = ConfigDict(extra="allow")
    auto_open: list[str] = Field(default_factory=list)


class Manifest(BaseModel):
    model_config = ConfigDict(extra="allow")
    qvf_version: int
    source: Source
    sections: list[Section]
    viewer_defaults: ViewerDefaults | None = None


# ── Exceptions ────────────────────────────────────────────────────────────


class QVFError(Exception):
    """Base for all QVF reader errors."""


class QVFOpenError(QVFError):
    """Cannot open or read the .qvf file."""


class ManifestValidationError(QVFError):
    """manifest.json failed JSON Schema validation."""


class SHA256MismatchError(QVFError):
    """sha256 of a member does not match the manifest. Hard error for that section."""


class SectionNotFoundError(QVFError):
    """Requested section id not found in manifest."""


# ── Covalent radii table (angstroms) for bond inference ─────────────────
# From Cordero et al., Dalton Trans. (2008). Used when no bonds section
# is present in the QVF file.

_COVALENT_RADII: dict[int, float] = {
    1: 0.31,
    2: 0.28,
    3: 1.28,
    4: 0.96,
    5: 0.84,
    6: 0.76,
    7: 0.71,
    8: 0.66,
    9: 0.57,
    10: 0.58,
    11: 1.66,
    12: 1.41,
    13: 1.21,
    14: 1.11,
    15: 1.07,
    16: 1.05,
    17: 1.02,
    18: 1.06,
    19: 2.03,
    20: 1.76,
    21: 1.70,
    22: 1.60,
    23: 1.53,
    24: 1.39,
    25: 1.39,
    26: 1.32,
    27: 1.26,
    28: 1.24,
    29: 1.32,
    30: 1.22,
    31: 1.22,
    32: 1.20,
    33: 1.19,
    34: 1.20,
    35: 1.20,
    36: 1.16,
    37: 2.20,
    38: 1.95,
    39: 1.90,
    40: 1.75,
    41: 1.64,
    42: 1.54,
    43: 1.47,
    44: 1.46,
    45: 1.42,
    46: 1.39,
    47: 1.45,
    48: 1.44,
    49: 1.42,
    50: 1.39,
    51: 1.39,
    52: 1.38,
    53: 1.39,
    54: 1.40,
    55: 2.44,
    56: 2.15,
    57: 2.07,
    58: 2.04,
    59: 2.03,
    60: 2.01,
    61: 1.99,
    62: 1.98,
    63: 1.98,
    64: 1.96,
    65: 1.94,
    66: 1.92,
    67: 1.92,
    68: 1.89,
    69: 1.90,
    70: 1.87,
    71: 1.87,
    72: 1.75,
    73: 1.70,
    74: 1.62,
    75: 1.51,
    76: 1.44,
    77: 1.41,
    78: 1.36,
    79: 1.36,
    80: 1.32,
    81: 1.45,
    82: 1.46,
    83: 1.48,
    84: 1.40,
    85: 1.50,
    86: 1.50,
    87: 2.60,
    88: 2.21,
    89: 2.15,
    90: 2.06,
    91: 2.00,
    92: 1.96,
    93: 1.90,
    94: 1.87,
    95: 1.80,
    96: 1.69,
}

_BOND_TOLERANCE = 0.45  # angstroms — sum of covalent radii is stretched by this


@dataclass
class Atom:
    symbol: str
    position: np.ndarray  # [3] float64, angstroms
    atomic_number: int


@dataclass
class StructureData:
    atoms: list[Atom]
    pbc: tuple[bool, bool, bool]
    lattice_vectors: np.ndarray | None  # [3,3] or None for molecules
    bonds: list[tuple[int, int]] | None  # explicit from bonds.json, or None


@dataclass
class GridData:
    # Both ``origin`` and ``voxel_vectors`` are in **bohr** per the QVF
    # v1 contract (design § 1.3a). Renderers that draw alongside atoms
    # (Å) must convert. The :class:`VolumeRenderer` does this at its
    # PyVista boundary; the wavefunction renderer evaluates internally
    # in bohr and converts when emitting the mesh.
    origin: np.ndarray  # [3], bohr
    voxel_vectors: np.ndarray  # [3,3], bohr
    shape: tuple[int, int, int]


@dataclass
class BandsData:
    kpath: dict[str, Any]  # raw kpath.json
    eigenvalues: np.ndarray  # [n_spin, n_kpoints, n_bands]
    fermi: float | None


@dataclass
class SpectraData:
    frequencies: np.ndarray  # [n_modes] cm⁻¹
    intensities: np.ndarray  # [n_modes] km/mol


@dataclass
class TrajectoryData:
    atoms: list[Atom]  # atom types (fixed across frames)
    coords: np.ndarray  # [n_frames, n_atoms, 3]
    energies: list[float] | None  # per-frame energies


@dataclass
class VibrationsData:
    atoms: list[Atom]
    frequencies: np.ndarray  # [n_modes] cm⁻¹
    displacements: np.ndarray  # [n_modes, n_atoms, 3]


@dataclass
class AtomPropertiesData:
    mulliken_charges: np.ndarray | None  # [n_atoms] float64
    loewdin_charges: np.ndarray | None  # [n_atoms] float64


@dataclass
class CitationsData:
    """BibTeX bibliography embedded in the QVF.

    The writer stores `references.bib` as a binary member (utf-8 bytes
    + sha256). We decode here so renderers see a string.
    """
    bibtex: str


@dataclass
class NMRData:
    """NMR section payload.

    Writer (qvf.py::_write_spectra_nmr_section) pass-through of a dict
    with conventional keys: chemical_shifts, shielding_tensors,
    j_couplings, isotope, reference, solvent. Shape of each field is
    not schema-enforced, so we keep the raw dict and let the renderer
    decide what to surface.
    """
    raw: dict


@dataclass
class SymmetryData:
    """spglib-style symmetry analysis embedded in the QVF.

    Writer (qvf.py::_write_symmetry_section) passes through whatever
    dict the producer hands it (space group number, symbol, point
    group, Hall symbol, …). Keys are conventional, not enforced by
    schema. The renderer surfaces every key it finds.
    """
    raw: dict


@dataclass
class SCFHistoryData:
    """Per-iteration SCF trail.

    Writer (qvf.py::_write_scf_history_section) stores a JSON document
    `{"iterations": [{"iter", "energy_eh", "delta_e", "diis_error"},
    ...]}`. Field set is conventional, not enforced by schema — keep
    them as plain dicts so the renderer can degrade gracefully when
    a key is missing (e.g. non-DIIS solvers have no `diis_error`).
    """
    iterations: list[dict[str, float | int]]


@dataclass
class BasisShell:
    """One contracted shell of the GTO basis (§ 1.5)."""

    center: int  # 0-based atom index
    l: int  # angular momentum
    exponents: np.ndarray  # [n_prim] bohr^-2
    coefficients: np.ndarray  # [n_prim] contraction coefficients (normalized primitives)
    pure: bool = True  # spherical (True) or Cartesian (False)


@dataclass
class WavefunctionGTOData:
    """`wavefunction.gto` payload.

    For restricted: `mo_coefficients` is set, `mo_coefficients_alpha`
    and `mo_coefficients_beta` are None.

    For unrestricted: the per-spin variants are set, and
    `mo_coefficients` is None.

    Each coefficient matrix is row-major [n_mo, n_ao].
    """

    structure_ref: str
    pure: bool
    n_ao: int
    shells: list[BasisShell]
    spin: str  # "restricted" | "unrestricted"
    orbital_kind: str  # "canonical" | "natural" | "localized"
    energies: np.ndarray | None  # [n_mo] (restricted only)
    occupations: np.ndarray | None  # [n_mo] (restricted only)
    symmetry_labels: list[str] | None
    alpha_energies: np.ndarray | None
    alpha_occupations: np.ndarray | None
    beta_energies: np.ndarray | None
    beta_occupations: np.ndarray | None
    mo_coefficients: np.ndarray | None  # [n_mo, n_ao] float64
    mo_coefficients_alpha: np.ndarray | None
    mo_coefficients_beta: np.ndarray | None


@dataclass
class ReactionWaypoint:
    """One reaction-path waypoint annotation."""

    frame_index: int
    label: str
    kind: str  # "reactant" | "transition_state" | "intermediate" | "product" | "point"
    energy_eh: float | None = None


@dataclass
class ReactionPathData:
    """Self-contained reaction path (`reaction.path`).

    Binary layout matches `trajectory`: coords[n_frames, n_atoms, 3] in Å.

    Periodic reaction paths (qvf_version >= 2) additionally carry the
    per-frame lattice + dimensionality so the renderer can draw the
    cell and wrap atoms across periodic boundaries:

    * ``lattice`` is None for molecular paths. Otherwise a float64
      array of shape (3, 3) when every frame shares the same lattice
      (the fixed-cell common case) or (n_frames, 3, 3) for variable-
      cell paths. Columns are a, b, c, in **bohr** — matching
      ``vibeqc.PeriodicSystem.lattice``. The renderer is responsible
      for the bohr→Å conversion (coords are already Å).
    * ``dim`` is None for molecular paths. Otherwise an int in
      {1, 2, 3}; ``dim_per_frame`` is set instead when frames carry
      different dimensionalities.
    """

    atoms: list[Atom]
    coords: np.ndarray  # [n_frames, n_atoms, 3] float64 Å
    energies: list[float] | None
    reaction_coordinate: list[float] | None
    waypoints: list[ReactionWaypoint]
    lattice: np.ndarray | None = None  # bohr; None or (3,3) or (n_frames,3,3)
    dim: int | None = None  # 1/2/3; shared across frames
    dim_per_frame: list[int] | None = None  # variable-dim case


@dataclass
class ReactionWaypointsData:
    """Waypoint annotations layered on a referenced trajectory."""

    trajectory_ref: str
    waypoints: list[ReactionWaypoint]
    reaction_coordinate: list[float] | None


def _parse_bond_pairs(raw: Any) -> list[tuple[int, int]]:
    """Parse a QVF bonds payload into zero-based atom-index pairs."""
    if not isinstance(raw, dict):
        raise QVFError("bonds payload is not a JSON object")
    pairs = raw.get("pairs", [])
    if not isinstance(pairs, list):
        raise QVFError("bonds payload field 'pairs' is not a list")

    bonds: list[tuple[int, int]] = []
    for idx, pair in enumerate(pairs):
        if not isinstance(pair, dict):
            raise QVFError(f"bonds pair {idx} is not a JSON object")
        try:
            i = int(pair["i"])
            j = int(pair["j"])
        except (KeyError, TypeError, ValueError) as e:
            raise QVFError(f"bonds pair {idx} is missing integer i/j fields") from e
        bonds.append((i, j))
    return bonds


# ── Manifest loader ───────────────────────────────────────────────────────

_SCHEMA_PATH = Path(__file__).parent / "schema.json"
_SCHEMA_PATH_V2 = Path(__file__).parent / "schema_v2.json"

# Load both schemas once at module level. v2 ships periodic reaction.path
# with per-frame lattice + dim; v1 is the original molecular contract.
with open(_SCHEMA_PATH) as f:
    _MANIFEST_SCHEMA: dict[str, Any] = json.load(f)
with open(_SCHEMA_PATH_V2) as f:
    _MANIFEST_SCHEMA_V2: dict[str, Any] = json.load(f)

_SCHEMAS_BY_VERSION: dict[int, dict[str, Any]] = {
    1: _MANIFEST_SCHEMA,
    2: _MANIFEST_SCHEMA_V2,
}


def _load_manifest_schema(qvf_version: int = 1) -> dict[str, Any]:
    """Return the JSON Schema for manifest.json (cached, per version)."""
    schema = _SCHEMAS_BY_VERSION.get(qvf_version)
    if schema is None:
        raise ManifestValidationError(
            f"unsupported qvf_version {qvf_version!r}; vibe-view "
            f"supports versions {sorted(_SCHEMAS_BY_VERSION)}"
        )
    return schema


# ── QVFReader ─────────────────────────────────────────────────────────────


QVFSource = Union[str, Path, bytes, bytearray, memoryview, IO[bytes]]
"""Anything `QVFReader` can open: a filesystem path, raw zip bytes, or a
seekable binary file-like object (BytesIO, an opened file, etc.).

Used by vibe-qc to hand a freshly built QVF over to the viewer without
touching disk."""


class QVFReader:
    """Read a .qvf archive and provide lazy access to sections.

    The archive can come from any of:

    * a filesystem path (``str`` / :class:`pathlib.Path` ending in ``.qvf``)
    * raw zip bytes (``bytes`` / ``bytearray`` / ``memoryview``) — vibe-qc
      can build a QVF in memory via ``qvf_bytes(...)`` and pass them in
    * a seekable binary file-like object (``BytesIO``, opened binary
      file, …)

    The zip handle is kept open for the lifetime of the reader so lazy
    binary blobs can be read on demand. ``close()`` (or use as a context
    manager) releases it; if the reader owns an in-memory buffer that is
    released too.
    """

    def __init__(self, source: QVFSource) -> None:
        self._path: Path | None
        self._buffer: io.BytesIO | None = None

        if isinstance(source, (str, Path)):
            self._path = Path(source)
            if not self._path.exists():
                raise QVFOpenError(f"file not found: {self._path}")
            if self._path.suffix != ".qvf":
                raise QVFOpenError(
                    f"expected .qvf extension, got: {self._path.suffix}"
                )
            try:
                self._zf = zipfile.ZipFile(self._path, "r")
            except zipfile.BadZipFile as e:
                raise QVFOpenError(f"not a valid zip archive: {e}") from None
        elif isinstance(source, (bytes, bytearray, memoryview)):
            self._path = None
            self._buffer = io.BytesIO(bytes(source))
            try:
                self._zf = zipfile.ZipFile(self._buffer, "r")
            except zipfile.BadZipFile as e:
                raise QVFOpenError(f"not a valid zip archive: {e}") from None
        elif hasattr(source, "read"):
            # Generic seekable file-like
            self._path = None
            try:
                self._zf = zipfile.ZipFile(source, "r")
            except zipfile.BadZipFile as e:
                raise QVFOpenError(f"not a valid zip archive: {e}") from None
        else:
            raise QVFOpenError(
                f"QVFReader: unsupported source type {type(source).__name__}; "
                "expected path, bytes, or seekable file-like object"
            )

        # ── Step 1: extract + parse manifest.json ──
        try:
            raw = self._zf.read("manifest.json").decode("utf-8")
        except KeyError:
            self.close()
            raise QVFOpenError("manifest.json not found in archive") from None

        try:
            manifest_dict = json.loads(raw)
        except json.JSONDecodeError as e:
            self.close()
            raise QVFOpenError(f"manifest.json is not valid JSON: {e}") from None

        # ── Step 2: validate against JSON Schema ──
        # Pick the schema by the manifest's declared qvf_version so v2
        # periodic reaction.path archives validate against the v2
        # schema. An unknown version is reported as such instead of
        # mis-validating against the v1 schema.
        try:
            declared_version = int(manifest_dict.get("qvf_version", 1))
        except (TypeError, ValueError):
            declared_version = 1
        try:
            chosen_schema = _load_manifest_schema(declared_version)
        except ManifestValidationError:
            self.close()
            raise
        try:
            jsonschema.validate(instance=manifest_dict, schema=chosen_schema)
        except jsonschema.ValidationError as e:
            self.close()
            raise ManifestValidationError(str(e)) from None

        # ── Step 3: parse into pydantic models ──
        self._manifest = Manifest.model_validate(manifest_dict)

        # Build section lookup
        self._section_by_id: dict[str, Section] = {}
        for s in self._manifest.sections:
            if s.id in self._section_by_id:
                self.close()
                raise ManifestValidationError(
                    f"duplicate section id in manifest: {s.id!r}"
                )
            self._section_by_id[s.id] = s

        # Per-section error state (populated on sha256 mismatch)
        self._section_errors: dict[str, str] = {}

    @property
    def manifest(self) -> Manifest:
        return self._manifest

    @property
    def path(self) -> Path | None:
        """Path to the .qvf file on disk, or ``None`` for in-memory archives."""
        return self._path

    @property
    def source(self) -> Source:
        return self._manifest.source

    @property
    def sections(self) -> list[Section]:
        return self._manifest.sections

    @property
    def viewer_defaults(self) -> ViewerDefaults | None:
        return self._manifest.viewer_defaults

    def has_section(self, section_id: str) -> bool:
        return section_id in self._section_by_id

    def get_section(self, section_id: str) -> Section:
        try:
            return self._section_by_id[section_id]
        except KeyError:
            raise SectionNotFoundError(f"section not found: {section_id!r}") from None

    def section_error(self, section_id: str) -> str | None:
        """Return error string if this section had a sha256 mismatch, or None."""
        return self._section_errors.get(section_id)

    # ── sha256 verification (Rule 4) ──────────────────────────────────

    def _verify_and_read(self, member: MemberSpec) -> bytes:
        """Read a member from the zip and verify its sha256.

        Returns the raw bytes. Raises SHA256MismatchError on mismatch.
        """
        try:
            data = self._zf.read(member.path)
        except KeyError:
            raise QVFError(
                f"member file {member.path!r} declared in manifest but missing from zip"
            ) from None
        except zipfile.BadZipFile as e:
            raise QVFOpenError(f"cannot read zip member {member.path!r}: {e}") from None
        actual = hashlib.sha256(data).hexdigest()
        if actual != member.sha256:
            raise SHA256MismatchError(
                f"sha256 mismatch for {member.path!r}: expected {member.sha256}, got {actual}"
            )
        return data

    def _read_json_member(self, section_id: str, member_name: str) -> Any:
        """Read + verify a JSON member. Returns parsed JSON."""
        section = self.get_section(section_id)
        member = section.members.get(member_name)
        if member is None:
            raise QVFError(f"member {member_name!r} not found in section {section_id!r}")
        raw = self._verify_and_read(member)
        try:
            return json.loads(raw.decode("utf-8"))
        except UnicodeDecodeError as e:
            raise QVFError(
                f"member {member.path!r} in section {section_id!r} is not UTF-8: {e}"
            ) from None
        except json.JSONDecodeError as e:
            raise QVFError(
                f"member {member.path!r} in section {section_id!r} is not valid JSON: {e}"
            ) from None

    def _read_binary_member(self, section_id: str, member_name: str) -> np.ndarray:
        """Read + verify a binary member. Returns numpy array."""
        section = self.get_section(section_id)
        member = section.members.get(member_name)
        if member is None:
            raise QVFError(f"member {member_name!r} not found in section {section_id!r}")
        raw = self._verify_and_read(member)
        dtype = member.dtype or "float32"
        try:
            arr = np.frombuffer(raw, dtype=np.dtype(dtype))
            if member.shape is not None:
                arr = arr.reshape(member.shape)
        except ValueError as e:
            raise QVFError(
                f"member {member.path!r} in section {section_id!r} cannot be "
                f"read as dtype={dtype!r}, shape={member.shape!r}: {e}"
            ) from None
        return arr

    # ── Eager data extraction ─────────────────────────────────────────

    def read_structure(self) -> StructureData:
        """Read + verify the structure section. Always eager."""
        section_id = "structure"
        try:
            raw = self._read_json_member(section_id, "structure")
        except QVFError as e:
            self._section_errors[section_id] = str(e)
            raise

        atoms = [
            Atom(
                symbol=a["symbol"],
                position=np.array(a["position"], dtype=np.float64),
                atomic_number=a["atomic_number"],
            )
            for a in raw["atoms"]
        ]
        pbc = tuple(raw.get("pbc", [False, False, False]))
        lattice = raw.get("lattice_vectors")
        if lattice is not None:
            lattice = np.array(lattice, dtype=np.float64)

        # Try to read explicit bonds if present. Early draft archives
        # embedded a `bonds` member in the structure section; canonical
        # QVF v1 archives carry a separate `kind: bonds` section.
        bonds = None
        structure_section = self.get_section(section_id)
        if "bonds" in structure_section.members:
            try:
                bonds_raw = self._read_json_member(section_id, "bonds")
                bonds = _parse_bond_pairs(bonds_raw)
            except QVFError as exc:
                self._section_errors[section_id] = str(exc)
                raise
        else:
            bond_section = next(
                (s for s in self._manifest.sections if s.kind == "bonds"),
                None,
            )
            if bond_section is not None:
                try:
                    bonds = self.read_bonds(bond_section.id)
                except QVFError as e:
                    self._section_errors[bond_section.id] = str(e)
                    raise QVFError(
                        f"explicit bonds section {bond_section.id!r} could not be read: {e}"
                    ) from None

        return StructureData(atoms=atoms, pbc=pbc, lattice_vectors=lattice, bonds=bonds)

    def read_bonds(self, section_id: str) -> list[tuple[int, int]]:
        """Read a canonical ``bonds`` section as zero-based atom pairs."""
        raw = self._read_json_member(section_id, "bonds")
        return _parse_bond_pairs(raw)

    def infer_bonds(self, structure: StructureData) -> list[tuple[int, int]]:
        """Infer bonds from covalent radii when no explicit bonds section."""
        if structure.bonds is not None:
            return structure.bonds

        bonds: list[tuple[int, int]] = []
        positions = np.array([a.position for a in structure.atoms])
        nums = [a.atomic_number for a in structure.atoms]
        n = len(structure.atoms)

        for i in range(n):
            ri = _COVALENT_RADII.get(nums[i], 1.5)
            for j in range(i + 1, n):
                rj = _COVALENT_RADII.get(nums[j], 1.5)
                cutoff = (ri + rj) + _BOND_TOLERANCE
                dist = float(np.linalg.norm(positions[i] - positions[j]))
                if dist < cutoff:
                    bonds.append((i, j))
        return bonds

    # ── Lazy binary extraction for volumes ────────────────────────────

    def read_volume_grid(self, section_id: str) -> GridData:
        """Read the grid descriptor for a volume section (always eager — tiny)."""
        raw = self._read_json_member(section_id, "grid")
        return GridData(
            origin=np.array(raw["origin"], dtype=np.float64),
            voxel_vectors=np.array(raw["voxel_vectors"], dtype=np.float64),
            shape=tuple(raw["shape"]),
        )

    def read_volume_data(self, section_id: str) -> np.ndarray:
        """Read + verify the volumetric .dat blob (lazy — call on UI activation)."""
        return self._read_binary_member(section_id, "data")

    # ── Other section readers ─────────────────────────────────────────

    def read_bands(self, section_id: str) -> BandsData:
        kpath = self._read_json_member(section_id, "kpath")
        eigenvalues = self._read_binary_member(section_id, "eigenvalues")
        if eigenvalues.ndim == 2:
            eigenvalues = eigenvalues[np.newaxis, :, :]
        elif eigenvalues.ndim != 3:
            raise QVFError(
                f"bands section {section_id!r}: eigenvalues must have rank 3 "
                f"[n_spin, n_kpoints, n_bands], got shape {eigenvalues.shape}"
            )
        n_kpoints = kpath.get("n_kpoints")
        if n_kpoints is not None and int(n_kpoints) != eigenvalues.shape[1]:
            raise QVFError(
                f"bands section {section_id!r}: kpath n_kpoints={n_kpoints} "
                f"does not match eigenvalues shape {eigenvalues.shape}"
            )
        n_bands = kpath.get("n_bands")
        if n_bands is not None and int(n_bands) != eigenvalues.shape[2]:
            raise QVFError(
                f"bands section {section_id!r}: kpath n_bands={n_bands} "
                f"does not match eigenvalues shape {eigenvalues.shape}"
            )
        fermi = kpath.get("fermi")
        return BandsData(kpath=kpath, eigenvalues=eigenvalues, fermi=fermi)

    def read_spectra(self, section_id: str) -> SpectraData:
        raw = self._read_json_member(section_id, "spectrum")
        return SpectraData(
            frequencies=np.array(raw["frequencies"], dtype=np.float64),
            intensities=np.array(raw["intensities"], dtype=np.float64),
        )

    def read_trajectory(self, section_id: str) -> TrajectoryData:
        meta = self._read_json_member(section_id, "metadata")
        coords = self._read_binary_member(section_id, "coords")
        atoms = [
            Atom(
                symbol=a["symbol"],
                position=np.zeros(3),
                atomic_number=a["atomic_number"],
            )
            for a in meta["atoms"]
        ]
        return TrajectoryData(
            atoms=atoms,
            coords=coords,
            energies=meta.get("energies"),
        )

    def read_vibrations(self, section_id: str) -> VibrationsData:
        meta = self._read_json_member(section_id, "metadata")
        displacements = self._read_binary_member(section_id, "displacements")
        atoms = [
            Atom(
                symbol=a["symbol"],
                position=np.array(a["position"], dtype=np.float64),
                atomic_number=a["atomic_number"],
            )
            for a in meta["atoms"]
        ]
        return VibrationsData(
            atoms=atoms,
            frequencies=np.array(meta["frequencies"], dtype=np.float64),
            displacements=displacements,
        )

    def read_wavefunction_gto(self, section_id: str) -> WavefunctionGTOData:
        """Read a `wavefunction.gto` section (basis + MO coefficients)."""
        basis_raw = self._read_json_member(section_id, "basis")
        meta_raw = self._read_json_member(section_id, "mo_metadata")

        global_pure = bool(basis_raw.get("pure", True))
        shells = [
            BasisShell(
                center=int(sh["center"]),
                l=int(sh["l"]),
                exponents=np.asarray(sh["exponents"], dtype=np.float64),
                coefficients=np.asarray(sh["coefficients"], dtype=np.float64),
                pure=bool(sh.get("pure", global_pure)),
            )
            for sh in basis_raw.get("shells", [])
        ]
        n_ao = int(basis_raw.get("n_ao", 0))
        structure_ref = str(basis_raw.get("structure_ref", "structure"))

        spin = str(meta_raw.get("spin", "restricted"))
        orbital_kind = str(meta_raw.get("orbital_kind", "canonical"))

        energies: np.ndarray | None = None
        occupations: np.ndarray | None = None
        sym_labels: list[str] | None = None
        a_e = a_o = b_e = b_o = None
        coeffs = a_c = b_c = None

        if spin == "unrestricted":
            alpha = meta_raw.get("alpha", {}) or {}
            beta = meta_raw.get("beta", {}) or {}
            a_e = np.asarray(alpha.get("energies", []), dtype=np.float64)
            a_o = np.asarray(alpha.get("occupations", []), dtype=np.float64)
            b_e = np.asarray(beta.get("energies", []), dtype=np.float64)
            b_o = np.asarray(beta.get("occupations", []), dtype=np.float64)
            sym_labels = alpha.get("symmetry_labels") or beta.get("symmetry_labels")
            if "mo_coefficients_alpha" in self.get_section(section_id).members:
                a_c = self._read_binary_member(section_id, "mo_coefficients_alpha")
            if "mo_coefficients_beta" in self.get_section(section_id).members:
                b_c = self._read_binary_member(section_id, "mo_coefficients_beta")
        else:
            energies = np.asarray(meta_raw.get("energies", []), dtype=np.float64)
            occupations = np.asarray(meta_raw.get("occupations", []), dtype=np.float64)
            sym_labels = meta_raw.get("symmetry_labels")
            if "mo_coefficients" in self.get_section(section_id).members:
                coeffs = self._read_binary_member(section_id, "mo_coefficients")

        return WavefunctionGTOData(
            structure_ref=structure_ref,
            pure=global_pure,
            n_ao=n_ao,
            shells=shells,
            spin=spin,
            orbital_kind=orbital_kind,
            energies=energies,
            occupations=occupations,
            symmetry_labels=sym_labels,
            alpha_energies=a_e,
            alpha_occupations=a_o,
            beta_energies=b_e,
            beta_occupations=b_o,
            mo_coefficients=coeffs,
            mo_coefficients_alpha=a_c,
            mo_coefficients_beta=b_c,
        )

    def read_reaction_path(self, section_id: str) -> ReactionPathData:
        """Read a `reaction.path` section (self-contained: frames + waypoints).

        v2 archives may carry a `lattice` binary member + a `dim`
        integer in the metadata JSON; both surface on the returned
        ``ReactionPathData`` (None on v1 / molecular).
        """
        meta = self._read_json_member(section_id, "metadata")
        coords = self._read_binary_member(section_id, "coords")
        atoms = [
            Atom(
                symbol=a["symbol"],
                position=np.zeros(3),
                atomic_number=a["atomic_number"],
            )
            for a in meta["atoms"]
        ]
        waypoints = [
            ReactionWaypoint(
                frame_index=int(wp["frame_index"]),
                label=str(wp["label"]),
                kind=str(wp["kind"]),
                energy_eh=(
                    float(wp["energy_eh"]) if "energy_eh" in wp else None
                ),
            )
            for wp in meta.get("waypoints", [])
        ]
        rxn_coord = meta.get("reaction_coordinate")

        # v2 periodic fields. The lattice binary member is optional —
        # absent for molecular paths (qvf_version=1).
        section = self.get_section(section_id)
        lattice = None
        if section.members.get("lattice") is not None:
            lattice = self._read_binary_member(section_id, "lattice")
        dim_field = meta.get("dim")
        dim_per_frame = meta.get("dim_per_frame")
        return ReactionPathData(
            atoms=atoms,
            coords=coords,
            energies=meta.get("energies"),
            reaction_coordinate=list(rxn_coord) if rxn_coord is not None else None,
            waypoints=waypoints,
            lattice=lattice,
            dim=int(dim_field) if dim_field is not None else None,
            dim_per_frame=[int(d) for d in dim_per_frame] if dim_per_frame else None,
        )

    def read_reaction_waypoints(self, section_id: str) -> ReactionWaypointsData:
        """Read a `reaction.waypoints` annotation that points at a trajectory."""
        section = self.get_section(section_id)
        traj_ref = section.trajectory_ref
        if traj_ref is None:
            extras = getattr(section, "model_extra", None) or {}
            traj_ref = extras.get("trajectory_ref")
        if not traj_ref:
            raise QVFError(
                f"reaction.waypoints {section_id!r} missing trajectory_ref"
            )
        payload = self._read_json_member(section_id, "waypoints")
        waypoints = [
            ReactionWaypoint(
                frame_index=int(wp["frame_index"]),
                label=str(wp["label"]),
                kind=str(wp["kind"]),
                energy_eh=(
                    float(wp["energy_eh"]) if "energy_eh" in wp else None
                ),
            )
            for wp in payload.get("waypoints", [])
        ]
        rxn_coord = payload.get("reaction_coordinate")
        return ReactionWaypointsData(
            trajectory_ref=str(traj_ref),
            waypoints=waypoints,
            reaction_coordinate=list(rxn_coord) if rxn_coord is not None else None,
        )

    def read_atom_properties(self, section_id: str) -> AtomPropertiesData:
        """Read Mulliken and/or Löwdin charges from atom_properties section."""
        mulliken = None
        loewdin = None
        section = self.get_section(section_id)
        if "mulliken_charge" in section.members:
            mulliken = self._read_binary_member(section_id, "mulliken_charge")
        if "loewdin_charge" in section.members:
            loewdin = self._read_binary_member(section_id, "loewdin_charge")
        return AtomPropertiesData(
            mulliken_charges=mulliken,
            loewdin_charges=loewdin,
        )

    def read_nmr(self, section_id: str) -> NMRData:
        """Read the NMR spectrum payload from a spectra.nmr section.

        The producer hands the writer an opaque dict with conventional
        keys (chemical_shifts, shielding_tensors, j_couplings,
        isotope, reference, solvent); we surface the dict as-is.
        """
        raw = self._read_json_member(section_id, "spectrum")
        if not isinstance(raw, dict):
            raise QVFError(
                f"spectra.nmr section {section_id!r}: 'spectrum' member is "
                "not a JSON object"
            )
        return NMRData(raw=raw)

    def read_symmetry(self, section_id: str) -> SymmetryData:
        """Read the spglib-style symmetry summary from a
        structure.symmetry section."""
        raw = self._read_json_member(section_id, "data")
        if not isinstance(raw, dict):
            raise QVFError(
                f"structure.symmetry section {section_id!r}: 'data' member "
                "is not a JSON object"
            )
        return SymmetryData(raw=raw)

    def read_scf_history(self, section_id: str) -> SCFHistoryData:
        """Read the per-iteration SCF history from a scf_history section.

        Iteration record keys are conventional (see
        :class:`SCFHistoryData`); the renderer tolerates missing keys.
        """
        raw = self._read_json_member(section_id, "iterations")
        iters = raw.get("iterations") if isinstance(raw, dict) else None
        if not isinstance(iters, list):
            raise QVFError(
                f"scf_history section {section_id!r}: missing or malformed "
                "'iterations' list"
            )
        return SCFHistoryData(iterations=iters)

    def read_citations(self, section_id: str) -> CitationsData:
        """Read the BibTeX bytes from a citations section.

        The writer stores them as a binary member (utf-8) per design
        § 1.4 / qvf.py::_write_citations_section. We decode to str
        here so renderers don't need to know the on-disk encoding.
        """
        section = self.get_section(section_id)
        member = section.members.get("references")
        if member is None:
            raise QVFError(
                f"member 'references' not found in citations section "
                f"{section_id!r}"
            )
        raw = self._verify_and_read(member)
        return CitationsData(bibtex=raw.decode("utf-8"))

    def close(self) -> None:
        self._zf.close()
        if self._buffer is not None:
            self._buffer.close()
            self._buffer = None

    def __enter__(self) -> QVFReader:
        return self

    def __exit__(self, *args: object) -> None:
        self.close()
