Merge branch 'feature/6'
This commit is contained in:
commit
d84b02bba3
3 changed files with 106 additions and 1 deletions
2
TODO
2
TODO
|
|
@ -95,7 +95,7 @@ Content-Type: application/issue
|
|||
ID: 6
|
||||
Type: feature
|
||||
Title: Test suite: unit and smoke
|
||||
Status: in-progress
|
||||
Status: done
|
||||
Priority: medium
|
||||
Created: 2026-06-16
|
||||
Module: sekft
|
||||
|
|
|
|||
30
tests/smoke/test_entrypoints.py
Normal file
30
tests/smoke/test_entrypoints.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""Smoke tests: the console entry points load and respond to --help without the
|
||||
GPU stack (torch is imported lazily inside the training/eval code paths)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
SRC = ROOT / "src"
|
||||
POSIX_SRC = ROOT.parent / "posix-sdc" / "src"
|
||||
|
||||
|
||||
def _help(module: str) -> "subprocess.CompletedProcess[str]":
|
||||
env = dict(os.environ, PYTHONPATH=os.pathsep.join([str(SRC), str(POSIX_SRC)]))
|
||||
return subprocess.run([sys.executable, "-m", module, "--help"],
|
||||
capture_output=True, text=True, env=env)
|
||||
|
||||
|
||||
def test_train_help() -> None:
|
||||
cp = _help("tiararodney.sekft.sft")
|
||||
assert cp.returncode == 0, cp.stderr
|
||||
assert "--data" in cp.stdout
|
||||
|
||||
|
||||
def test_eval_help() -> None:
|
||||
cp = _help("tiararodney.sekft.eval")
|
||||
assert cp.returncode == 0, cp.stderr
|
||||
assert "--adapter" in cp.stdout
|
||||
75
tests/unit/test_sft.py
Normal file
75
tests/unit/test_sft.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Unit tests for the SFT render canonicalisation and assistant-only mask.
|
||||
|
||||
These run anywhere: a fake additive tokenizer stands in for a real chat
|
||||
template, so no torch/transformers is needed."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from tiararodney.sekft import sft
|
||||
|
||||
|
||||
class FakeTok:
|
||||
"""Additive chat template: each turn renders to ``<role> tokens... </e>``;
|
||||
the generation prompt appends ``<assistant>``."""
|
||||
|
||||
def apply_chat_template(self, msgs: list[dict], add_generation_prompt: bool = False,
|
||||
return_tensors: Any = None) -> list[str]:
|
||||
toks: list[str] = []
|
||||
for m in msgs:
|
||||
toks.append(f"<{m['role']}>")
|
||||
toks += m["content"].split()
|
||||
toks.append("</e>")
|
||||
if add_generation_prompt:
|
||||
toks.append("<assistant>")
|
||||
return toks
|
||||
|
||||
|
||||
def test_normalize_folds_system_and_merges_consecutive() -> None:
|
||||
raw = [
|
||||
{"role": "system", "content": "orient"},
|
||||
{"role": "user", "content": "login"},
|
||||
{"role": "user", "content": "prompt"},
|
||||
{"role": "assistant", "content": "cat f"},
|
||||
{"role": "user", "content": "out"},
|
||||
{"role": "user", "content": "prompt"},
|
||||
{"role": "assistant", "content": "exit"},
|
||||
]
|
||||
norm = sft.normalize_for_template(raw)
|
||||
assert [m["role"] for m in norm] == ["user", "assistant", "user", "assistant"]
|
||||
assert norm[0]["content"] == "orient\nlogin\nprompt"
|
||||
|
||||
|
||||
def test_normalize_leaves_clean_alternation_untouched() -> None:
|
||||
raw = [{"role": "user", "content": "a"}, {"role": "assistant", "content": "b"}]
|
||||
assert sft.normalize_for_template(raw) == raw
|
||||
|
||||
|
||||
def test_mask_trains_assistant_turns_only() -> None:
|
||||
raw = [
|
||||
{"role": "system", "content": "orient"},
|
||||
{"role": "user", "content": "login"},
|
||||
{"role": "assistant", "content": "cat f"},
|
||||
{"role": "user", "content": "out"},
|
||||
{"role": "assistant", "content": "exit"},
|
||||
]
|
||||
ex = sft.build_masked_example(raw, FakeTok())
|
||||
trained = [t for t, lab in zip(ex["input_ids"], ex["labels"]) if lab != -100]
|
||||
masked = [t for t, lab in zip(ex["input_ids"], ex["labels"]) if lab == -100]
|
||||
assert set(trained) <= {"<assistant>", "cat", "f", "exit", "</e>"}
|
||||
assert "cat" in trained and "exit" in trained # both commands present
|
||||
assert {"orient", "login", "out"} <= set(masked) # environment masked
|
||||
|
||||
|
||||
def test_mask_raises_on_non_additive_template() -> None:
|
||||
class BadTok:
|
||||
def apply_chat_template(self, msgs: list[dict], add_generation_prompt: bool = False,
|
||||
return_tensors: Any = None) -> list[int]:
|
||||
return list(range(len(msgs), 0, -1)) # reversed: prefixes do not nest
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
sft.build_masked_example(
|
||||
[{"role": "user", "content": "a"}, {"role": "assistant", "content": "b"}],
|
||||
BadTok())
|
||||
Loading…
Add table
Add a link
Reference in a new issue