Merge branch 'feature/6'

2026-06-16 20:15:22 +02:00 · 2026-06-16 20:15:22 +02:00 · d84b02bba3
commit d84b02bba3
parent 45120dea97 95ce275301
3 changed files with 106 additions and 1 deletions
--- a/2
+++ b/2
@ -95,7 +95,7 @@ Content-Type: application/issue
 ID: 6
 Type: feature
 Title: Test suite: unit and smoke
-Status: in-progress
+Status: done
 Priority: medium
 Created: 2026-06-16
 Module: sekft
--- a/tests/smoke/test_entrypoints.py
+++ b/tests/smoke/test_entrypoints.py
@ -0,0 +1,30 @@
+"""Smoke tests: the console entry points load and respond to --help without the
+GPU stack (torch is imported lazily inside the training/eval code paths)."""
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[2]
+SRC = ROOT / "src"
+POSIX_SRC = ROOT.parent / "posix-sdc" / "src"
+
+
+def _help(module: str) -> "subprocess.CompletedProcess[str]":
+    env = dict(os.environ, PYTHONPATH=os.pathsep.join([str(SRC), str(POSIX_SRC)]))
+    return subprocess.run([sys.executable, "-m", module, "--help"],
+                          capture_output=True, text=True, env=env)
+
+
+def test_train_help() -> None:
+    cp = _help("tiararodney.sekft.sft")
+    assert cp.returncode == 0, cp.stderr
+    assert "--data" in cp.stdout
+
+
+def test_eval_help() -> None:
+    cp = _help("tiararodney.sekft.eval")
+    assert cp.returncode == 0, cp.stderr
+    assert "--adapter" in cp.stdout
--- a/tests/unit/test_sft.py
+++ b/tests/unit/test_sft.py
@ -0,0 +1,75 @@
+"""Unit tests for the SFT render canonicalisation and assistant-only mask.
+
+These run anywhere: a fake additive tokenizer stands in for a real chat
+template, so no torch/transformers is needed."""
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from tiararodney.sekft import sft
+
+
+class FakeTok:
+    """Additive chat template: each turn renders to ``<role> tokens... </e>``;
+    the generation prompt appends ``<assistant>``."""
+
+    def apply_chat_template(self, msgs: list[dict], add_generation_prompt: bool = False,
+                            return_tensors: Any = None) -> list[str]:
+        toks: list[str] = []
+        for m in msgs:
+            toks.append(f"<{m['role']}>")
+            toks += m["content"].split()
+            toks.append("</e>")
+        if add_generation_prompt:
+            toks.append("<assistant>")
+        return toks
+
+
+def test_normalize_folds_system_and_merges_consecutive() -> None:
+    raw = [
+        {"role": "system", "content": "orient"},
+        {"role": "user", "content": "login"},
+        {"role": "user", "content": "prompt"},
+        {"role": "assistant", "content": "cat f"},
+        {"role": "user", "content": "out"},
+        {"role": "user", "content": "prompt"},
+        {"role": "assistant", "content": "exit"},
+    ]
+    norm = sft.normalize_for_template(raw)
+    assert [m["role"] for m in norm] == ["user", "assistant", "user", "assistant"]
+    assert norm[0]["content"] == "orient\nlogin\nprompt"
+
+
+def test_normalize_leaves_clean_alternation_untouched() -> None:
+    raw = [{"role": "user", "content": "a"}, {"role": "assistant", "content": "b"}]
+    assert sft.normalize_for_template(raw) == raw
+
+
+def test_mask_trains_assistant_turns_only() -> None:
+    raw = [
+        {"role": "system", "content": "orient"},
+        {"role": "user", "content": "login"},
+        {"role": "assistant", "content": "cat f"},
+        {"role": "user", "content": "out"},
+        {"role": "assistant", "content": "exit"},
+    ]
+    ex = sft.build_masked_example(raw, FakeTok())
+    trained = [t for t, lab in zip(ex["input_ids"], ex["labels"]) if lab != -100]
+    masked = [t for t, lab in zip(ex["input_ids"], ex["labels"]) if lab == -100]
+    assert set(trained) <= {"<assistant>", "cat", "f", "exit", "</e>"}
+    assert "cat" in trained and "exit" in trained          # both commands present
+    assert {"orient", "login", "out"} <= set(masked)       # environment masked
+
+
+def test_mask_raises_on_non_additive_template() -> None:
+    class BadTok:
+        def apply_chat_template(self, msgs: list[dict], add_generation_prompt: bool = False,
+                                return_tensors: Any = None) -> list[int]:
+            return list(range(len(msgs), 0, -1))   # reversed: prefixes do not nest
+
+    with pytest.raises(ValueError):
+        sft.build_masked_example(
+            [{"role": "user", "content": "a"}, {"role": "assistant", "content": "b"}],
+            BadTok())