"""Fetch job outputs back from where the job ran.

Two functions, mirroring the local / remote split elsewhere:

* :func:`fetch_local` -- copy ``spec.cwd`` (the job's workspace) into a
  local destination directory.
* :func:`fetch_remote` -- ssh into the remote and stream
  ``<remote_vq> tar-workspace JOBID`` through tar(1) to extract into a
  local destination directory.

Both place the workspace contents under ``output_dir/<jobid>/`` so multiple
fetches don't collide.

The internal ``vq tar-workspace`` verb (hidden from --help) is what
``fetch_remote`` invokes on the far side -- it knows the remote's state
dir layout via the remote vq's own ``paths`` module, which is why we
delegate rather than constructing a tar command from a hardcoded path.
"""

from __future__ import annotations

import contextlib
import logging
import shutil
import subprocess
import sys
import tarfile
from pathlib import Path

from vq import paths, transport
from vq.config import HostConfig
from vq.ownership import check_owner
from vq.spec import JobSpec, utcnow_iso

log = logging.getLogger(__name__)


def fetch_local(jobid: str, output_dir: Path, *, multi_user: bool = False) -> Path:
    """Copy the local workspace for ``jobid`` into ``output_dir/<dest>/``.

    v0.5.34: ``<dest>`` is ``<job_name>-<jobid>`` when the spec has
    ``job_name`` set, else ``<jobid>`` (pre-v0.5.34 behaviour). The
    name-prefixed form makes a directory of fetched workspaces self-
    documenting — ``ls ./results`` shows what each job was.

    Returns the path of the copied workspace. If the job has been
    archived by ``vq cleanup --archive``, un-tars from the archive
    instead of copying the (gone) workspace dir. Raises
    :class:`FileNotFoundError` if the job isn't in the local queue, or
    if the workspace and archive are both missing.
    """
    if multi_user:
        spec_path = paths.resolve_spec_path(jobid, multi_user=True)
    else:
        spec_path = paths.spec_path(jobid)
        if not spec_path.exists():
            raise FileNotFoundError(f"no such job: {jobid}")
    spec = JobSpec.read(spec_path)
    # v0.6.x: multi-user ownership check.
    check_owner(spec)
    output_dir.mkdir(parents=True, exist_ok=True)
    dst = output_dir / spec.dest_dirname
    if dst.exists():
        raise FileExistsError(f"destination already exists: {dst} (remove it or pick another -o)")
    if spec.is_archived and spec.archive_path:
        archive = Path(spec.archive_path)
        if not archive.is_file():
            raise FileNotFoundError(
                f"archive for job {jobid} not found at {archive} "
                "(record exists in spec but file is gone)"
            )
        # Tarball top-level is ``<dest_dirname>/`` (= ``<name>-<jobid>``
        # for v0.5.34+ named jobs, or just ``<jobid>`` for pre-v0.5.34
        # archives and unnamed jobs). Either way extracting under
        # output_dir yields the right path because we computed
        # dst = output_dir / spec.dest_dirname above.
        with tarfile.open(archive, mode="r:bz2") as tf:
            tf.extractall(output_dir, filter="data")
    else:
        src = Path(spec.cwd)
        if not src.is_dir():
            raise FileNotFoundError(
                f"workspace for job {jobid} not found at {src} "
                "(was it cleaned up or never materialized?)"
            )
        shutil.copytree(src, dst)
    # Stamp last_fetched_at on terminal specs (same race-avoidance rule
    # as last_status_at: only the daemon writes non-terminal specs).
    if spec.is_terminal:
        spec.last_fetched_at = utcnow_iso()
        with contextlib.suppress(OSError):
            spec.write(spec_path)
    return dst


def fetch_remote(host_cfg: HostConfig, jobid: str, output_dir: Path) -> Path:
    """Stream the remote workspace for ``jobid`` into ``output_dir/<dest>/``.

    v0.5.34: ``<dest>`` is whatever the streaming tar's top-level
    directory says — when the remote spec has ``job_name`` set, the
    remote ``emit_workspace_tar`` writes ``arcname=<name>-<jobid>``;
    otherwise it writes ``arcname=<jobid>``. The receiver doesn't need
    to know the name in advance: we read the first tar member, learn
    the dest-dir name from it, do the pre-existence check, then
    extract.

    Pipes ``ssh <host> <remote_vq> tar-workspace <jobid>`` (which writes a
    tarball to stdout) through Python's tarfile reader. Errors on
    remote-side failure with the captured stderr text.
    """
    output_dir.mkdir(parents=True, exist_ok=True)

    cmd = [
        "ssh",
        host_cfg.ssh,
        host_cfg.remote_vq,
        "tar-workspace",
        jobid,
    ]
    log.debug("fetch_remote: %s", " ".join(cmd))
    proc = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    assert proc.stdout is not None
    dst: Path | None = None
    try:
        with tarfile.open(fileobj=proc.stdout, mode="r|") as tf:
            # Streaming-tar (mode="r|") doesn't support
            # next()+seek-back: each member must be extracted as we
            # iterate. So we infer the dest dir from the FIRST member
            # (which is the top-level dir entry written by
            # ``tf.add(dir, arcname=NAME)``: name=NAME, no slash) and
            # pre-check existence before extracting that first entry,
            # then extract it and the rest in the same loop.
            for member in tf:
                if dst is None:
                    # Top-level is the part before the first '/'. For
                    # the typical ``tf.add(dir, arcname=NAME)`` shape
                    # the first member is just NAME (a dir entry).
                    top_level = member.name.split("/", 1)[0]
                    dst = output_dir / top_level
                    if dst.exists():
                        raise FileExistsError(
                            f"destination already exists: {dst} (remove it or pick another -o)"
                        )
                tf.extract(member, output_dir, filter="data")
            if dst is None:
                raise transport.RemoteError(
                    "remote tar-workspace produced an empty tarball "
                    f"(job {jobid}); remote stderr: "
                    f"{(proc.stderr.read().decode(errors='replace') if proc.stderr else '').strip() or '(empty)'}"
                )
    except tarfile.TarError as e:
        proc.kill()
        proc.wait()
        # Drain stderr for the raise below
        _, stderr = proc.communicate()
        raise transport.RemoteError(
            f"failed to extract remote workspace tarball: {e}; "
            f"remote stderr: {stderr.decode(errors='replace').strip() or '(empty)'}"
        ) from e
    rc = proc.wait()
    if rc != 0:
        stderr = proc.stderr.read().decode(errors="replace") if proc.stderr else ""
        raise transport.RemoteError(
            f"remote tar-workspace failed (exit {rc}) on {host_cfg.ssh}: "
            f"{stderr.strip() or '(empty)'}"
        )
    # dst is always set if we reach here (the early-return branches all
    # raise); the assertion is for the type checker.
    assert dst is not None
    return dst


def emit_workspace_tar(jobid: str) -> None:
    """Write the local workspace tarball for ``jobid`` to ``sys.stdout.buffer``.

    Used by the internal ``vq tar-workspace`` verb. Single-file functions
    like this stay separate from :func:`fetch_local` because the latter
    writes to a directory and this one writes a stream.

    Archive-aware (v0.5.10.1+): when the spec has ``archived_at`` set, the
    workspace dir is gone but the tarball lives at ``spec.archive_path``.
    We stream those bytes directly to stdout. The receiving end
    (``fetch_remote``) opens with ``tarfile.open(mode="r|")`` which
    autodetects compression, so the bz2-compressed archive bytes flow
    through unchanged.
    """
    spec_path = paths.spec_path(jobid)
    if not spec_path.exists():
        # Match the kill / status error style for consistency.
        raise FileNotFoundError(f"no such job: {jobid}")
    spec = JobSpec.read(spec_path)
    if spec.is_archived and spec.archive_path:
        archive = Path(spec.archive_path)
        if not archive.is_file():
            raise FileNotFoundError(
                f"archive for job {jobid} not found at {archive} "
                "(record exists in spec but file is gone)"
            )
        # Stream archive bytes directly. The .tar.bz2 was produced by
        # cleanup.archive_workspace with arcname=jobid, matching what the
        # non-archived path below would emit, so the receiver doesn't see
        # any difference in tar layout.
        with archive.open("rb") as f:
            shutil.copyfileobj(f, sys.stdout.buffer)
        return
    src = Path(spec.cwd)
    if not src.is_dir():
        raise FileNotFoundError(f"workspace for job {jobid} not found at {src}")
    # v0.5.34: arcname is the spec's ``dest_dirname`` (= ``<name>-<jobid>``
    # when job_name is set, else just ``<jobid>``). The streaming tar's
    # top-level directory becomes the receiver's destination directory
    # name — see ``fetch_remote``'s peek-first-member logic.
    with tarfile.open(fileobj=sys.stdout.buffer, mode="w|") as tf:
        tf.add(src, arcname=spec.dest_dirname)
