"""Unit tests for the trainer's three-source data loader (raw dir / curated jsonl). The Hub path delegates to posix-sdc and is covered there.""" from __future__ import annotations import json from pathlib import Path import pytest from tiararodney.sekft import sft def test_load_turns_from_raw_dir(tmp_path: Path) -> None: (tmp_path / "a.json").write_text(json.dumps( {"keep": True, "turns": [{"role": "assistant", "content": "ls"}]})) (tmp_path / "b.json").write_text(json.dumps( # not kept -> excluded {"keep": False, "turns": [{"role": "assistant", "content": "rm -rf /"}]})) got = list(sft.load_turns(tmp_path)) assert len(got) == 1 assert got[0][0]["content"] == "ls" def test_load_turns_from_jsonl(tmp_path: Path) -> None: f = tmp_path / "corpus.jsonl" f.write_text("\n".join(json.dumps({"turns": [{"role": "assistant", "content": c}]}) for c in ("ls", "cat x")) + "\n") got = list(sft.load_turns(f)) assert [t[0]["content"] for t in got] == ["ls", "cat x"] def test_load_turns_rejects_other_paths(tmp_path: Path) -> None: bad = tmp_path / "notes.txt" bad.write_text("hi") with pytest.raises(SystemExit): list(sft.load_turns(bad))