|
25 | 25 |
|
26 | 26 | _DEFAULT_SPECIAL_TOKENS = ["<|im_end|>", "<|endoftext|>", "<|eot_id|>", "<|end|>"] |
27 | 27 |
|
| 28 | +# Chat turn terminators eligible to be used as generation stop strings. This is a |
| 29 | +# deliberate allowlist of end-of-turn / end-of-text tokens -- NOT the tokenizer's |
| 30 | +# full special-token set. Structural/tool delimiters (e.g. <tool_call>) must reach |
| 31 | +# the tool parser, so they are intentionally excluded: using them as hard stops |
| 32 | +# would truncate a tool call before it is ever parsed. |
| 33 | +_TURN_TERMINATORS = ( |
| 34 | + "<|im_end|>", |
| 35 | + "<|endoftext|>", |
| 36 | + "<|eot_id|>", |
| 37 | + "<|end|>", |
| 38 | + "<|end_of_text|>", |
| 39 | + "<end_of_turn>", |
| 40 | + "</s>", |
| 41 | +) |
| 42 | + |
| 43 | + |
| 44 | +def _content_text(content) -> str: |
| 45 | + """Best-effort text for the ChatML fallback: a str as-is, or the concatenated |
| 46 | + text parts of an OpenAI list-content message (non-text parts dropped). Avoids |
| 47 | + rendering a Python repr of structured content. None -> empty string.""" |
| 48 | + if isinstance(content, str): |
| 49 | + return content |
| 50 | + if isinstance(content, list): |
| 51 | + out = [] |
| 52 | + for part in content: |
| 53 | + if isinstance(part, dict) and part.get("type") == "text": |
| 54 | + out.append(str(part.get("text", ""))) |
| 55 | + elif isinstance(part, str): |
| 56 | + out.append(part) |
| 57 | + return "".join(out) |
| 58 | + return str(content or "") |
| 59 | + |
28 | 60 |
|
29 | 61 | def _decode_tool_call_arguments(messages: list[dict[str, Any]]) -> None: |
30 | 62 | """In-place: parse each tool call's ``function.arguments`` from a JSON string |
@@ -120,25 +152,64 @@ def count_tokens(self, prompt: str) -> Optional[int]: |
120 | 152 | return len(self._hf.encode(prompt, add_special_tokens=False)) |
121 | 153 | return None |
122 | 154 |
|
123 | | - def special_tokens(self) -> list[str]: |
124 | | - """Special-token strings whose appearance ends the visible content. |
| 155 | + def turn_stop_sequences(self) -> list[str]: |
| 156 | + """Generation stop strings: model/template-specific *turn terminators* |
| 157 | + only -- the tokenizer's EOS plus known chat turn-end tokens -- NOT the |
| 158 | + full special-token set. |
| 159 | +
|
| 160 | + Structural/tool delimiters (e.g. <tool_call>) are deliberately excluded: |
| 161 | + if a tokenizer registers them as special, using the whole special set as |
| 162 | + hard stops would halt generation at the delimiter and truncate the tool |
| 163 | + call before the parser ever sees it. Whitespace-only tokens are dropped. |
| 164 | + User-supplied request `stop` strings are handled separately and are not |
| 165 | + affected by this set. |
| 166 | +
|
| 167 | + May return [] if the tokenizer has no eos_token and registers none of the |
| 168 | + known terminators as special; in that case end-of-turn detection relies |
| 169 | + entirely on the worker's EOS-by-token-id check (e.g. the Qwen engine adds |
| 170 | + <|im_end|> to eos_ids), so the string set here is only a backstop. |
| 171 | + """ |
| 172 | + if self._hf is None: |
| 173 | + return list(_DEFAULT_SPECIAL_TOKENS) |
| 174 | + specials = { |
| 175 | + t |
| 176 | + for t in (getattr(self._hf, "all_special_tokens", []) or []) |
| 177 | + if isinstance(t, str) and t.strip() |
| 178 | + } |
| 179 | + out: list[str] = [] |
| 180 | + eos = getattr(self._hf, "eos_token", None) |
| 181 | + if isinstance(eos, str) and eos.strip(): |
| 182 | + out.append(eos) |
| 183 | + for t in _TURN_TERMINATORS: |
| 184 | + if t in specials and t not in out: |
| 185 | + out.append(t) |
| 186 | + return out |
125 | 187 |
|
126 | | - From the HF tokenizer when available (model-accurate), else a default set |
127 | | - covering common chat models. |
| 188 | + def special_tokens(self) -> list[str]: |
| 189 | + """ALL special-token strings, for final content cleanup -- stripping any |
| 190 | + special token that leaked into visible output. Deliberately broad, and |
| 191 | + distinct from turn_stop_sequences(): this set must NOT be used as |
| 192 | + generation stops or pre-parse truncation (that would halt/cut a tool call |
| 193 | + at a structural delimiter), only to scrub trailing specials from the |
| 194 | + already-parsed visible content. Whitespace-only tokens are dropped so a |
| 195 | + stray ' ' token can't truncate content at the first double space. |
128 | 196 | """ |
129 | 197 | if self._hf is not None: |
130 | 198 | toks = list(getattr(self._hf, "all_special_tokens", []) or []) |
131 | | - return [t for t in toks if isinstance(t, str) and t] |
| 199 | + return [t for t in toks if isinstance(t, str) and t.strip()] |
132 | 200 | return list(_DEFAULT_SPECIAL_TOKENS) |
133 | 201 |
|
134 | 202 | @staticmethod |
135 | 203 | def _fallback(messages: list[ChatMessage]) -> str: |
136 | | - # Approximate ChatML. Provide --hf-tokenizer for model-correct formatting |
137 | | - # (including reasoning controls like enable_thinking, which the fallback |
138 | | - # cannot reproduce). |
| 204 | + # Approximate ChatML, TEXT-ONLY. Provide --hf-tokenizer for model-correct |
| 205 | + # formatting (reasoning controls like enable_thinking, and structured |
| 206 | + # tool/multimodal turns, which this fallback cannot reproduce). This path |
| 207 | + # renders only text content: assistant `tool_calls` and a tool-role |
| 208 | + # `tool_call_id` are dropped, so it is NOT a correctness path for tool or |
| 209 | + # multimodal conversations -- use a real --hf-tokenizer for those. |
139 | 210 | parts = [] |
140 | 211 | for m in messages: |
141 | | - content = m.content if isinstance(m.content, str) else str(m.content or "") |
| 212 | + content = _content_text(m.content) |
142 | 213 | parts.append(f"<|im_start|>{m.role}\n{content}<|im_end|>") |
143 | 214 | parts.append("<|im_start|>assistant\n") |
144 | 215 | return "\n".join(parts) |
0 commit comments