bugfix(16): operators must not feed a BatchEncoding to model.generate
The transformers 5.x return-type change behind #15 also breaks generation: apply_chat_template(add_generation_prompt=True, return_tensors="pt") returns a BatchEncoding, and eval.py + resident.py passed it to model.generate, which does inputs.shape[0] -> AttributeError (the holdout eval crashed on scenario 1). #15 fixed only the trainer. Factor a shared _input_ids helper and a render_prompt_ids function; both operators use it. Tests cover _input_ids for both shapes and render_prompt_ids.
This commit is contained in:
parent
d261919404
commit
1279bc8965
5 changed files with 58 additions and 18 deletions
|
|
@ -86,6 +86,21 @@ def test_mask_handles_batchencoding_return() -> None:
|
|||
== sft.build_masked_example(raw, FakeTok()))
|
||||
|
||||
|
||||
def test_input_ids_extracts_from_batchencoding_or_passthrough() -> None:
|
||||
# BatchEncoding (transformers 5.x) -> its input_ids; bare list/tensor (4.x) -> itself
|
||||
assert sft._input_ids({"input_ids": [1, 2, 3], "attention_mask": [1, 1, 1]}) == [1, 2, 3]
|
||||
assert sft._input_ids([4, 5, 6]) == [4, 5, 6]
|
||||
|
||||
|
||||
def test_render_prompt_ids_normalises_and_appends_generation_prompt() -> None:
|
||||
# the generation operators rely on this: fold + append <assistant>, return ids
|
||||
# (not a BatchEncoding) so model.generate doesn't choke on a dict.
|
||||
raw = [{"role": "system", "content": "orient"}, {"role": "user", "content": "go"}]
|
||||
ids = sft.render_prompt_ids(FakeTok(), raw)
|
||||
assert ids[-1] == "<assistant>" # generation prompt appended
|
||||
assert {"orient", "go"} <= set(ids) # system folded into the user turn
|
||||
|
||||
|
||||
def test_mask_raises_on_non_additive_template() -> None:
|
||||
class BadTok:
|
||||
def apply_chat_template(self, msgs: list[dict[str, str]], add_generation_prompt: bool = False,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue