"""``vibeqc.output.citations`` — DB loader, assembly, writers.

Pins the contract documented in
``docs/design_output_module.md § Citation database``:

  1. The bundled ``database.toml`` loads cleanly via
     :func:`load_default_database` — no missing entries referenced
     from any route.
  2. ``schema_version`` matches ``CitationDatabase.SCHEMA_VERSION``.
  3. Every functional / basis / dispersion model exercised in the test
     suite resolves through ``assemble(plan, ...)`` to at least one
     citation (no silent gaps in routing for the v0.8.0 surface).
  4. The vibe-qc software citation is always the first entry.
  5. ``write_bibtex`` and ``write_references`` produce non-empty files
     containing the assembled bibtex_keys / titles.
  6. Template substitution: ``{{VIBEQC_VERSION}}`` resolves to the
     running package version in the software citation.
"""

from __future__ import annotations

from pathlib import Path

import pytest
from vibeqc.output import OutputPlan
from vibeqc.output.citations import (
    Citation,
    CitationDatabase,
    DatabaseError,
    assemble,
    format_references,
    format_references_block,
    load_database,
    load_default_database,
    write_bibtex,
    write_references,
)

# ---------------------------------------------------------------------------
# Database load + structural integrity
# ---------------------------------------------------------------------------


def test_default_database_loads_without_errors() -> None:
    db = load_default_database()
    assert isinstance(db, CitationDatabase)
    # The bundled database has at least the v0.8.0-on-main coverage.
    entries = db.entries()
    assert "vibeqc_software" in entries
    assert "libint_valeev" in entries
    assert "libxc_2018" in entries
    assert "peintinger_pob_tzvp_2013" in entries
    assert "pbe_1996" in entries
    assert "becke_1993" in entries
    assert "pulay_diis_1980" in entries


def test_database_schema_version_matches_loader() -> None:
    db = load_default_database()
    # If schema_version drifts in the TOML, this test fails fast — the
    # loader's strict schema-version check enforces the contract.
    assert db.SCHEMA_VERSION == "1"


def test_load_database_rejects_dangling_route_reference(
    tmp_path: Path,
) -> None:
    bad = tmp_path / "broken.toml"
    bad.write_text(
        'schema_version = "1"\n'
        "[entries.foo]\n"
        'kind = "article"\n'
        'bibtex_key = "foo_2020"\n'
        'authors = ["A, B"]\n'
        'title = "T"\n'
        "\n"
        "[routes.basis_sets]\n"
        '"bar" = ["nonexistent_entry"]\n',
        encoding="utf-8",
    )
    with pytest.raises(DatabaseError, match="missing entry"):
        load_database(bad)


# ---------------------------------------------------------------------------
# Assembly: software always first, expected entries fire
# ---------------------------------------------------------------------------


def _plan(
    tmp_path: Path, *, basis: str, functional: str | None = None, method: str = "rks"
) -> OutputPlan:
    return OutputPlan.from_run_job_kwargs(
        output=tmp_path / "job",
        method=method,
        basis=basis,
        functional=functional,
    )


def test_software_citation_is_first(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g"))
    assert len(result) > 0
    assert result.citations[0].key == "vibeqc_software"


def test_libint_always_fires_after_software(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g"))
    keys = [c.key for c in result.citations]
    assert "libint_valeev" in keys
    assert keys.index("libint_valeev") == 1


def test_pbe_run_pulls_libxc_and_pbe(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g", functional="PBE"))
    keys = {c.key for c in result.citations}
    assert "libxc_2018" in keys
    assert "pbe_1996" in keys
    assert result.warnings == ()


def test_pob_tzvp_rev2_pulls_both_basis_papers(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="pob-tzvp-rev2"))
    keys = {c.key for c in result.citations}
    assert "peintinger_pob_tzvp_2013" in keys
    assert "vilela_oliveira_pob_rev2_2019" in keys


def test_b3lyp_pulls_becke_lyp_and_stephens(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="6-31g*", functional="B3LYP"))
    keys = {c.key for c in result.citations}
    assert "becke_1993" in keys
    assert "lee_yang_parr_1988" in keys
    assert "stephens_b3lyp_1994" in keys
    assert "libxc_2018" in keys


def test_diis_default_fires(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g"))
    keys = {c.key for c in result.citations}
    assert "pulay_diis_1980" in keys
    assert "pulay_diis_1982" in keys


def test_unknown_basis_emits_warning_but_does_not_raise(
    tmp_path: Path,
) -> None:
    result = assemble(_plan(tmp_path, basis="totally-made-up-basis"))
    assert any("totally-made-up-basis" in w for w in result.warnings)
    # The software + libint + DIIS routes still fire.
    keys = {c.key for c in result.citations}
    assert "vibeqc_software" in keys


def test_unknown_functional_emits_warning_but_does_not_raise(
    tmp_path: Path,
) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g", functional="MADE-UP-XC"))
    assert any("MADE-UP-XC" in w for w in result.warnings)


def test_dispersion_d3bj_pulls_both_grimme_papers(tmp_path: Path) -> None:
    plan = _plan(tmp_path, basis="6-31g*", functional="PBE")
    db = load_default_database()
    result = db.assemble_from_plan(plan, dispersion="d3bj")
    keys = {c.key for c in result.citations}
    assert "grimme_d3_2010" in keys
    assert "grimme_d3bj_2011" in keys


def test_periodic_run_pulls_spglib(tmp_path: Path) -> None:
    plan = _plan(tmp_path, basis="pob-tzvp", functional="PBE")
    db = load_default_database()
    result = db.assemble_from_plan(plan, periodic=True)
    keys = {c.key for c in result.citations}
    assert "togo_tanaka_2018" in keys


def test_uses_ecp_pulls_libecpint(tmp_path: Path) -> None:
    plan = _plan(tmp_path, basis="def2-tzvp", functional="PBE")
    db = load_default_database()
    result = db.assemble_from_plan(plan, uses_ecp=True)
    keys = {c.key for c in result.citations}
    assert "shaw_gilbert_libecpint" in keys


def test_uses_ase_pulls_ase_paper(tmp_path: Path) -> None:
    plan = _plan(tmp_path, basis="6-31g*")
    db = load_default_database()
    result = db.assemble_from_plan(plan, uses_ase=True)
    keys = {c.key for c in result.citations}
    bibtex_keys = {c.bibtex_key for c in result.citations}
    assert "ase_2017" in keys
    assert "larsen_ase_2017" in bibtex_keys


def test_direct_scf_pulls_almlof_and_haser(tmp_path: Path) -> None:
    """When direct_scf=True, both the Almlöf 1982 and Häser-Ahlrichs
    1989 references fire."""
    plan = _plan(tmp_path, basis="def2-svp")
    db = load_default_database()
    result = db.assemble_from_plan(plan, direct_scf=True)
    keys = {c.key for c in result.citations}
    assert "almlof_direct_scf_1982" in keys
    assert "haser_ahlrichs_schwarz_1989" in keys


def test_direct_scf_false_does_not_pull_direct_refs(tmp_path: Path) -> None:
    """Without direct_scf=True the direct-SCF references are not cited."""
    plan = _plan(tmp_path, basis="def2-svp")
    db = load_default_database()
    result = db.assemble_from_plan(plan)  # direct_scf defaults to False
    keys = {c.key for c in result.citations}
    assert "almlof_direct_scf_1982" not in keys
    assert "haser_ahlrichs_schwarz_1989" not in keys


# ---------------------------------------------------------------------------
# Deduplication: each entry appears once
# ---------------------------------------------------------------------------


def test_assembled_list_has_no_duplicates(tmp_path: Path) -> None:
    # b3lyp pulls VWN5 via routes.functionals; pulling the LDA route
    # explicitly (if we ever do — we don't here) shouldn't duplicate.
    result = assemble(_plan(tmp_path, basis="6-31g*", functional="B3LYP"))
    keys = [c.key for c in result.citations]
    assert len(keys) == len(set(keys))


# ---------------------------------------------------------------------------
# Template substitution
# ---------------------------------------------------------------------------


def test_software_citation_resolves_version_template(
    tmp_path: Path,
) -> None:
    from vibeqc.banner import VIBEQC_VERSION

    result = assemble(_plan(tmp_path, basis="sto-3g"))
    software = result.citations[0]
    assert software.version == VIBEQC_VERSION
    # Year is the current year (best-effort).
    assert software.year and str(software.year).isdigit()


# ---------------------------------------------------------------------------
# Coverage gate: every functional / basis used in tests is routed.
# This is the CI gate that enforces the dev-chat discipline — adding a
# functional without updating database.toml fails here.
# ---------------------------------------------------------------------------

# Hand-picked from the v0.8.0 test surface. Update this list when a
# new feature lands; that fail is the explicit reminder to add the
# matching route.
_REQUIRED_FUNCTIONALS = (
    "LDA", "PBE", "PBE0", "B3LYP", "PW91", "B2PLYP",
    # meta-GGA + range-separated hybrids (v0.9.0 XC-library expansion).
    "TPSS", "r2scan", "wb97x", "wb97x-v",
)
# Every method `run_job` accepts that carries a method-specific
# citation route. Mean-field methods (rhf / uhf / rks / uks) are
# deliberately absent — their citations come from the integral
# library + the functional, not a method-specific paper. Post-SCF
# (CCSD / FCI) and composite-3c methods each have a defining paper.
_REQUIRED_METHODS = (
    "direct_scf", "ccsd", "ccsd(t)", "fci",
    "hf-3c", "pbeh-3c", "b97-3c", "b3lyp-3c",
    "r2scan-3c", "wb97x-3c", "hse-3c",
)
_REQUIRED_BASIS_SETS = (
    "STO-3G",
    "6-31G*",
    "def2-svp",
    "def2-tzvp",
    "def2-svp-jk",
    "def2-svp-jkfit",
    "def2-tzvp-jk",
    "def2-universal-jkfit",
    "def2-universal-jfit",
    "def2-svp-rifit",
    "cc-pvdz",
    "cc-pvdz-ri",
    "pob-tzvp",
    "pob-dzvp-rev2",
    "pob-tzvp-rev2",
)


@pytest.mark.parametrize("functional", _REQUIRED_FUNCTIONALS)
def test_required_functional_has_a_route(
    tmp_path: Path,
    functional: str,
) -> None:
    result = assemble(_plan(tmp_path, basis="sto-3g", functional=functional))
    # No warning that *this* functional was unrouted.
    bad = [w for w in result.warnings if functional in w]
    assert not bad, f"functional {functional!r} has no citation route"


@pytest.mark.parametrize("basis", _REQUIRED_BASIS_SETS)
def test_required_basis_has_a_route(tmp_path: Path, basis: str) -> None:
    result = assemble(_plan(tmp_path, basis=basis))
    bad = [w for w in result.warnings if basis.lower() in w.lower()]
    assert not bad, f"basis {basis!r} has no citation route"


@pytest.mark.parametrize("method_key", _REQUIRED_METHODS)
def test_required_method_route_is_present(method_key: str) -> None:
    """Every key in _REQUIRED_METHODS must exist in the database's
    methods routes table and resolve without dangling references."""
    db = load_default_database()
    routes = db._routes.get("methods", {})
    assert method_key in routes, (
        f"method route {method_key!r} missing from database.toml"
    )
    # Validate entries resolve.
    for entry_key in routes[method_key]:
        assert entry_key in db._entries, (
            f"route methods.{method_key!r} references unknown entry {entry_key!r}"
        )


@pytest.mark.parametrize("method_key", _REQUIRED_METHODS)
def test_required_method_route_actually_fires(
    tmp_path: Path, method_key: str,
) -> None:
    """Regression for the method= routing bug: ``assemble()`` must
    actually walk ``routes.methods[method]`` and emit those entries
    — not merely have the route present in the database. Before this
    was fixed, the CCSD / direct-SCF routes existed but no job's
    ``.bibtex`` ever contained them."""
    db = load_default_database()
    expected = set(db._routes["methods"][method_key])
    result = db.assemble(method=method_key, basis="sto-3g")
    got = {c.key for c in result.citations}
    missing = expected - got
    assert not missing, (
        f"method {method_key!r} declares routes {sorted(expected)} "
        f"but assemble() did not emit {sorted(missing)}"
    )


def test_mean_field_methods_have_no_method_route_warning(
    tmp_path: Path,
) -> None:
    """RHF / UHF / RKS / UKS must NOT produce a routing warning for
    the absence of a method-specific route — they are covered by the
    integral library + functional, by design."""
    db = load_default_database()
    for m in ("rhf", "uhf", "rks", "uks"):
        result = db.assemble(method=m, basis="sto-3g")
        bad = [w for w in result.warnings if "method" in w.lower()]
        assert not bad, (
            f"mean-field method {m!r} should not warn about a "
            f"missing method route; got {bad}"
        )


# ---------------------------------------------------------------------------
# Writers
# ---------------------------------------------------------------------------


def test_write_bibtex_emits_one_entry_per_citation(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="6-31g*", functional="PBE"))
    target = write_bibtex(tmp_path / "job", result)
    assert target == (tmp_path / "job").with_suffix(".bibtex")
    body = target.read_text(encoding="utf-8")
    # One @<type>{... block per citation.
    n_entries = body.count("@")
    assert n_entries >= len(result.citations)
    # bibtex_key of the software citation must appear.
    assert "peintinger_vibeqc" in body
    # libxc + PBE keys must appear in a PBE run.
    assert "lehtola_libxc_2018" in body
    assert "perdew_burke_ernzerhof_1996" in body


def test_write_references_emits_numbered_list(tmp_path: Path) -> None:
    result = assemble(_plan(tmp_path, basis="6-31g*", functional="PBE"))
    target = write_references(tmp_path / "job", result)
    assert target == (tmp_path / "job").with_suffix(".references")
    body = target.read_text(encoding="utf-8")
    assert "[1]" in body
    assert "[2]" in body
    assert "vibe-qc" in body.lower() or "vibeqc" in body.lower()
    # libxc + PBE citations appear in the plain text too.
    assert "Lehtola" in body
    assert "Perdew" in body


def test_format_references_block_starts_with_section_header() -> None:
    citations = (
        Citation(
            key="x",
            kind="article",
            bibtex_key="x_2020",
            authors=("Doe, Jane",),
            title="A study",
            year=2020,
            journal="J. Test",
            volume=1,
            pages="1--2",
        ),
    )
    block = format_references_block(citations)
    assert block.startswith("## References")
    assert "Doe, Jane" in block


def test_format_references_empty_list_is_graceful() -> None:
    block = format_references_block(())
    assert "## References" in block
    assert "no citations assembled" in block.lower()


def test_format_references_includes_warnings_in_plain_file() -> None:
    from vibeqc.output.citations.registry import AssembledCitations

    ac = AssembledCitations(citations=(), warnings=("synthetic warning",))
    body = format_references(ac)
    assert "synthetic warning" in body
