From fda018a324ee44606b96327e98c8da5e97940b0a Mon Sep 17 00:00:00 2001 From: Rufino Cabrera Date: Thu, 11 Jun 2026 10:25:45 +0200 Subject: [PATCH 1/7] Fix REPL y/N prompts showing ^M after Enter Restore cooked TTY state after prompt_toolkit exits so Rich console.input and other follow-up prompts (e.g. /compact confirmation) accept Enter normally. --- src/cai/repl/ui/prompt.py | 10 ++++++++++ tests/repl/test_multiline_prompt.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/cai/repl/ui/prompt.py b/src/cai/repl/ui/prompt.py index daf5f9501..a3b351e6e 100644 --- a/src/cai/repl/ui/prompt.py +++ b/src/cai/repl/ui/prompt.py @@ -216,6 +216,7 @@ def _toolbar_with_separator(): return [sep_line] # Get user input with all features + result = "" try: result = prompt( [("class:prompt", "CAI> ")], @@ -244,6 +245,15 @@ def _toolbar_with_separator(): except (AttributeError, OSError): _REPL_STDIN_EXHAUSTED_PENDING = True return "" + finally: + # prompt_toolkit restores termios from the snapshot taken after we cleared + # ICRNL above; Rich console.input and plain input() then echo Enter as ^M. + try: + from cai.util.streaming import restore_terminal_state + + restore_terminal_state(emit_trailing_newline=False) + except Exception: + pass # Print bottom separator only when user submitted non-empty input, # so that empty Enter produces a single separator between prompts. diff --git a/tests/repl/test_multiline_prompt.py b/tests/repl/test_multiline_prompt.py index e49384989..4835055b0 100644 --- a/tests/repl/test_multiline_prompt.py +++ b/tests/repl/test_multiline_prompt.py @@ -33,6 +33,29 @@ def test_multiline_is_enabled_in_prompt_config(self): 'Without this, Enter may only insert newlines after long agent turns.' ) + def test_tty_restored_after_prompt(self): + """Cooked TTY must be restored after prompt() for follow-up console.input prompts. + + We clear ICRNL before prompt_toolkit starts; on exit it restores that snapshot, + so /compact and other y/N confirmations see Enter as ^M unless we run + restore_terminal_state again in a finally block. + """ + from cai.repl.ui.prompt import get_user_input + import inspect + + source = inspect.getsource(get_user_input) + + assert 'finally:' in source, ( + 'REGRESSION: get_user_input must use finally to restore the TTY after ' + 'prompt_toolkit exits. Without this, Rich console.input shows ^M on Enter.' + ) + assert source.index('finally:') > source.index('prompt('), ( + 'REGRESSION: TTY restore must run after prompt(), not only before it.' + ) + assert 'restore_terminal_state' in source[source.index('finally:'):], ( + 'REGRESSION: finally block must call restore_terminal_state after prompt().' + ) + def test_icrnl_cleared_before_prompt(self): """ICRNL/INLCR/IGNCR must be cleared before prompt() to keep Enter as submit. From 937fbb5cc192291a522109d67c0f2119eb42d108 Mon Sep 17 00:00:00 2001 From: Rufino Cabrera Date: Thu, 11 Jun 2026 10:47:59 +0200 Subject: [PATCH 2/7] Fix /compact y/N prompt echoing ^M on Enter Force cooked canonical TTY mode after prompt_toolkit and route /compact confirmation through read_repl_yes_no, which restores the terminal and strips stray carriage returns from the answer. --- src/cai/repl/commands/compact.py | 10 +++--- src/cai/repl/ui/tty_input.py | 52 +++++++++++++++++++++++++++++ src/cai/util/streaming.py | 26 +++++++++++++++ tests/repl/test_multiline_prompt.py | 11 ++++++ tests/repl/test_tty_input.py | 48 ++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 src/cai/repl/ui/tty_input.py create mode 100644 tests/repl/test_tty_input.py diff --git a/src/cai/repl/commands/compact.py b/src/cai/repl/commands/compact.py index 8dba1cfef..af2fc0e91 100644 --- a/src/cai/repl/commands/compact.py +++ b/src/cai/repl/commands/compact.py @@ -13,6 +13,7 @@ from rich.panel import Panel from cai.repl.commands.base import Command, register_command +from cai.repl.ui.tty_input import read_repl_yes_no from cai.sdk.agents.models.openai_chatcompletions import get_current_active_model from cai.repl.commands.model import ( get_all_predefined_models, @@ -572,11 +573,10 @@ def handle_cleanup(): f"\n[#9aa0a6][CAI] Compact current conversation? [/]" f"[bold white]({msg_count} messages)[/bold white]" ) - confirm = console.input( - "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]" - ) - - if confirm.lower() == "y": + if read_repl_yes_no( + console, + "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]", + ): # Pass the detected agent name to _perform_compaction return self._perform_compaction(None, None, agent_name=agent_name) else: diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py new file mode 100644 index 000000000..6e9dc63e0 --- /dev/null +++ b/src/cai/repl/ui/tty_input.py @@ -0,0 +1,52 @@ +"""Line-oriented prompts after prompt_toolkit or Rich Live (y/N, confirmations).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from rich.console import Console + + +def prepare_tty_for_line_input() -> None: + """Restore cooked TTY before ``input()`` / Rich ``console.input``.""" + from cai.util.streaming import ensure_cooked_tty, restore_terminal_state + + restore_terminal_state(emit_trailing_newline=False) + ensure_cooked_tty() + + +def normalize_repl_line(value: str) -> str: + """Strip carriage returns left over from non-canonical TTY reads.""" + return value.replace("\r", "").strip() + + +def read_repl_line( + console: Console, + prompt: str = "", + *, + markup: bool = True, +) -> str: + """Read one line with Rich prompt styling; safe after the main CAI> prompt.""" + prepare_tty_for_line_input() + if prompt: + console.print(prompt, markup=markup, emoji=False, end="") + try: + raw = input() + except EOFError: + return "" + return normalize_repl_line(raw) + + +def read_repl_yes_no( + console: Console, + prompt: str, + *, + default: bool = False, + markup: bool = True, +) -> bool: + """Return True when the user answers y/yes (default answer when they press Enter).""" + answer = read_repl_line(console, prompt, markup=markup) + if not answer: + return default + return answer.lower() in ("y", "yes") diff --git a/src/cai/util/streaming.py b/src/cai/util/streaming.py index ead368db2..628d1743a 100644 --- a/src/cai/util/streaming.py +++ b/src/cai/util/streaming.py @@ -1386,6 +1386,32 @@ def _reset_controlling_tty_sane() -> None: ) except Exception: pass + ensure_cooked_tty() + + +def ensure_cooked_tty() -> None: + """Force canonical line input so ``input()`` / Rich ``console.input`` accept Enter. + + ``stty sane`` alone is not always enough after prompt_toolkit: it may restore a + snapshot taken while ICRNL was cleared, leaving Enter as raw ``\\r`` (shown as ^M). + """ + if not sys.stdin.isatty(): + return + try: + import termios + + fd = sys.stdin.fileno() + attrs = termios.tcgetattr(fd) + iflag, _, _, lflag, _, cc = attrs + iflag |= termios.ICRNL | termios.BRKINT + iflag &= ~(termios.INLCR | termios.IGNCR) + lflag |= termios.ICANON | termios.ECHO | termios.ISIG + cc[termios.VMIN] = 1 + cc[termios.VTIME] = 0 + termios.tcsetattr(fd, termios.TCSADRAIN, (iflag, attrs[1], attrs[2], lflag, attrs[4], cc)) + termios.tcflush(fd, termios.TCIFLUSH) + except Exception: + pass def restore_terminal_state( diff --git a/tests/repl/test_multiline_prompt.py b/tests/repl/test_multiline_prompt.py index 4835055b0..1fc004697 100644 --- a/tests/repl/test_multiline_prompt.py +++ b/tests/repl/test_multiline_prompt.py @@ -56,6 +56,17 @@ def test_tty_restored_after_prompt(self): 'REGRESSION: finally block must call restore_terminal_state after prompt().' ) + def test_ensure_cooked_tty_after_stty_sane(self): + """stty sane must be followed by explicit cooked termios for y/N prompts.""" + from cai.util.streaming import _reset_controlling_tty_sane + import inspect + + source = inspect.getsource(_reset_controlling_tty_sane) + assert 'ensure_cooked_tty' in source, ( + 'REGRESSION: _reset_controlling_tty_sane must call ensure_cooked_tty so ' + 'follow-up console.input accepts Enter instead of echoing ^M.' + ) + def test_icrnl_cleared_before_prompt(self): """ICRNL/INLCR/IGNCR must be cleared before prompt() to keep Enter as submit. diff --git a/tests/repl/test_tty_input.py b/tests/repl/test_tty_input.py new file mode 100644 index 000000000..13af3e2c2 --- /dev/null +++ b/tests/repl/test_tty_input.py @@ -0,0 +1,48 @@ +"""Tests for REPL follow-up prompts (y/N after prompt_toolkit).""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from cai.repl.ui.tty_input import ( + normalize_repl_line, + read_repl_line, + read_repl_yes_no, +) + + +class TestNormalizeReplLine: + def test_strips_carriage_return_from_enter(self): + assert normalize_repl_line("y\r") == "y" + + def test_strips_whitespace(self): + assert normalize_repl_line(" yes \r\n") == "yes" + + +class TestReadReplYesNo: + @patch("cai.repl.ui.tty_input.input", return_value="y\r") + @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") + def test_yes_with_carriage_return(self, _prepare, _input): + console = MagicMock() + assert read_repl_yes_no(console, "Continue? (y/N): ") is True + + @patch("cai.repl.ui.tty_input.input", return_value="") + @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") + def test_empty_defaults_to_no(self, _prepare, _input): + console = MagicMock() + assert read_repl_yes_no(console, "Continue? (y/N): ", default=False) is False + + @patch("cai.repl.ui.tty_input.input", return_value="n") + @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") + def test_no_answer(self, _prepare, _input): + console = MagicMock() + assert read_repl_yes_no(console, "Continue? (y/N): ") is False + + +class TestReadReplLine: + @patch("cai.repl.ui.tty_input.input", return_value="RESET\r") + @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") + def test_prepares_tty_before_read(self, prepare, _input): + console = MagicMock() + assert read_repl_line(console, "> ") == "RESET" + prepare.assert_called_once() From 95506508a28d027a4fa3549270ac4623bfe4bf82 Mon Sep 17 00:00:00 2001 From: Rufino Cabrera Date: Thu, 11 Jun 2026 11:17:53 +0200 Subject: [PATCH 3/7] Use prompt_toolkit for /compact y/N confirmation Rich console.input still echoed ^M after prompt_toolkit even with cooked TTY restore. Route compact confirmation through prompt_toolkit like the main CAI> prompt and other interactive agent flows. --- src/cai/repl/commands/compact.py | 5 +--- src/cai/repl/ui/tty_input.py | 51 ++++++++++++++++++++++++-------- tests/repl/test_tty_input.py | 34 ++++++++++----------- 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/cai/repl/commands/compact.py b/src/cai/repl/commands/compact.py index af2fc0e91..00df04085 100644 --- a/src/cai/repl/commands/compact.py +++ b/src/cai/repl/commands/compact.py @@ -573,10 +573,7 @@ def handle_cleanup(): f"\n[#9aa0a6][CAI] Compact current conversation? [/]" f"[bold white]({msg_count} messages)[/bold white]" ) - if read_repl_yes_no( - console, - "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]", - ): + if read_repl_yes_no(console, "Compact conversation?"): # Pass the detected agent name to _perform_compaction return self._perform_compaction(None, None, agent_name=agent_name) else: diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py index 6e9dc63e0..9f801635d 100644 --- a/src/cai/repl/ui/tty_input.py +++ b/src/cai/repl/ui/tty_input.py @@ -7,13 +7,14 @@ if TYPE_CHECKING: from rich.console import Console +_CAI_GREY = "#9aa0a6" +_CAI_GREEN = "#00ff9d" -def prepare_tty_for_line_input() -> None: - """Restore cooked TTY before ``input()`` / Rich ``console.input``.""" - from cai.util.streaming import ensure_cooked_tty, restore_terminal_state + +def _restore_tty_after_prompt() -> None: + from cai.util.streaming import restore_terminal_state restore_terminal_state(emit_trailing_newline=False) - ensure_cooked_tty() def normalize_repl_line(value: str) -> str: @@ -27,26 +28,52 @@ def read_repl_line( *, markup: bool = True, ) -> str: - """Read one line with Rich prompt styling; safe after the main CAI> prompt.""" - prepare_tty_for_line_input() - if prompt: - console.print(prompt, markup=markup, emoji=False, end="") + """Read one line using prompt_toolkit (same stack as the CAI> prompt).""" + from prompt_toolkit import prompt as ptk_prompt + from prompt_toolkit.formatted_text import HTML + + _restore_tty_after_prompt() try: - raw = input() - except EOFError: + if prompt and markup: + # Rich markup is for scrollback context only; ptk owns the input line. + console.print(prompt, markup=markup, emoji=False, end="") + raw = ptk_prompt("") + elif prompt: + raw = ptk_prompt(prompt) + else: + raw = ptk_prompt("") + except (EOFError, KeyboardInterrupt): return "" + finally: + _restore_tty_after_prompt() return normalize_repl_line(raw) def read_repl_yes_no( console: Console, - prompt: str, + label: str, *, default: bool = False, markup: bool = True, ) -> bool: """Return True when the user answers y/yes (default answer when they press Enter).""" - answer = read_repl_line(console, prompt, markup=markup) + from prompt_toolkit import prompt as ptk_prompt + from prompt_toolkit.formatted_text import HTML + + _restore_tty_after_prompt() + suffix = "Y/n" if default else "y/N" + ptk_prompt_text = HTML( + f' {label} ' + f': ' + ) + try: + raw = ptk_prompt(ptk_prompt_text) + except (EOFError, KeyboardInterrupt): + return default + finally: + _restore_tty_after_prompt() + + answer = normalize_repl_line(raw) if not answer: return default return answer.lower() in ("y", "yes") diff --git a/tests/repl/test_tty_input.py b/tests/repl/test_tty_input.py index 13af3e2c2..0d44ec577 100644 --- a/tests/repl/test_tty_input.py +++ b/tests/repl/test_tty_input.py @@ -20,29 +20,29 @@ def test_strips_whitespace(self): class TestReadReplYesNo: - @patch("cai.repl.ui.tty_input.input", return_value="y\r") - @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") - def test_yes_with_carriage_return(self, _prepare, _input): + @patch("prompt_toolkit.prompt", return_value="y\r") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_yes_with_carriage_return(self, _restore, _prompt): console = MagicMock() - assert read_repl_yes_no(console, "Continue? (y/N): ") is True + assert read_repl_yes_no(console, "Continue") is True - @patch("cai.repl.ui.tty_input.input", return_value="") - @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") - def test_empty_defaults_to_no(self, _prepare, _input): + @patch("prompt_toolkit.prompt", return_value="") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_empty_defaults_to_no(self, _restore, _prompt): console = MagicMock() - assert read_repl_yes_no(console, "Continue? (y/N): ", default=False) is False + assert read_repl_yes_no(console, "Continue", default=False) is False - @patch("cai.repl.ui.tty_input.input", return_value="n") - @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") - def test_no_answer(self, _prepare, _input): + @patch("prompt_toolkit.prompt", return_value="n") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_no_answer(self, _restore, _prompt): console = MagicMock() - assert read_repl_yes_no(console, "Continue? (y/N): ") is False + assert read_repl_yes_no(console, "Continue") is False class TestReadReplLine: - @patch("cai.repl.ui.tty_input.input", return_value="RESET\r") - @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input") - def test_prepares_tty_before_read(self, prepare, _input): + @patch("prompt_toolkit.prompt", return_value="RESET\r") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_restores_tty_around_prompt(self, restore, _prompt): console = MagicMock() - assert read_repl_line(console, "> ") == "RESET" - prepare.assert_called_once() + assert read_repl_line(console, "> ", markup=False) == "RESET" + assert restore.call_count == 2 From f454c41e94da4a76ac128b7af6f52d1abe4d99f7 Mon Sep 17 00:00:00 2001 From: Rufino Cabrera Date: Thu, 11 Jun 2026 11:26:40 +0200 Subject: [PATCH 4/7] Remove unused HTML import from read_repl_line --- src/cai/repl/ui/tty_input.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py index 9f801635d..2c78631a8 100644 --- a/src/cai/repl/ui/tty_input.py +++ b/src/cai/repl/ui/tty_input.py @@ -30,7 +30,6 @@ def read_repl_line( ) -> str: """Read one line using prompt_toolkit (same stack as the CAI> prompt).""" from prompt_toolkit import prompt as ptk_prompt - from prompt_toolkit.formatted_text import HTML _restore_tty_after_prompt() try: From 23739d06312536f5ad88a849ee948d929cc6a859 Mon Sep 17 00:00:00 2001 From: Rufino Cabrera Date: Thu, 11 Jun 2026 12:14:09 +0200 Subject: [PATCH 5/7] Hide LLM gateway URL from user-facing error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace endpoint URLs in httpx retry exhaustion errors with the branded Alias Robotics® LLM servers label; URLs remain in debug logs only. --- .../models/chatcompletions/httpx_client.py | 40 ++++++++++++++----- tests/sdk/test_httpx_client_errors.py | 15 +++++++ 2 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 tests/sdk/test_httpx_client_errors.py diff --git a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py index 3d428299a..848c83971 100644 --- a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py +++ b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py @@ -43,6 +43,9 @@ # HTTP status codes that trigger automatic retry _RETRYABLE_STATUS = {429, 502, 503, 504, 529} +# User-facing label — never embed gateway URLs in exception text shown in the REPL. +_LLM_SERVER_LABEL = "Alias Robotics® LLM servers" + _LOG = logging.getLogger(__name__) @@ -197,11 +200,17 @@ async def _stream_gen() -> AsyncIterator: # Retries exhausted — raise typed error if resp.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after {_MAX_RETRIES} retries from {url}", + ( + f"Rate limited (429) after {_MAX_RETRIES} retries " + f"from {_LLM_SERVER_LABEL}" + ), retry_after=_extract_retry_after(resp), ) raise LLMProviderUnavailable( - f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Server error ({resp.status_code}) after " + f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}" + ) ) # HTTP 413: request body exceeds gateway/proxy POST @@ -215,7 +224,7 @@ async def _stream_gen() -> AsyncIterator: pass _log_failed_completion_response(resp, url) raise LLMContextOverflow( - f"Request body too large (413) for {url}", + f"Request body too large (413) for {_LLM_SERVER_LABEL}", details=_build_413_details(url, body), ) @@ -312,11 +321,11 @@ async def _stream_gen() -> AsyncIterator: continue if e.response.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after retries from {url}" + f"Rate limited (429) after retries from {_LLM_SERVER_LABEL}" ) from e if e.response.status_code in (408, 504): raise LLMTimeout( - f"Timeout ({e.response.status_code}) from {url}" + f"Timeout ({e.response.status_code}) from {_LLM_SERVER_LABEL}" ) from e raise @@ -368,19 +377,28 @@ async def _stream_gen() -> AsyncIterator: # Retries exhausted — raise typed error if resp.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after {_MAX_RETRIES} retries from {url}", + ( + f"Rate limited (429) after {_MAX_RETRIES} retries " + f"from {_LLM_SERVER_LABEL}" + ), retry_after=_extract_retry_after(resp), ) if resp.status_code in (408, 504): raise LLMTimeout( - f"Timeout ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Timeout ({resp.status_code}) after {_MAX_RETRIES} " + f"retries from {_LLM_SERVER_LABEL}" + ) ) raise LLMProviderUnavailable( - f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Server error ({resp.status_code}) after " + f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}" + ) ) if resp.status_code in (408,): - raise LLMTimeout(f"Timeout ({resp.status_code}) from {url}") + raise LLMTimeout(f"Timeout ({resp.status_code}) from {_LLM_SERVER_LABEL}") # HTTP 413: request body exceeds gateway/proxy POST size cap. # Not in _RETRYABLE_STATUS because resending the same body @@ -390,7 +408,7 @@ async def _stream_gen() -> AsyncIterator: if resp.status_code == 413: _log_failed_completion_response(resp, url) raise LLMContextOverflow( - f"Request body too large (413) for {url}", + f"Request body too large (413) for {_LLM_SERVER_LABEL}", details=_build_413_details(url, body), ) @@ -422,4 +440,4 @@ async def _stream_gen() -> AsyncIterator: # Should not reach here if last_error: raise last_error - raise LLMProviderUnavailable(f"All retries exhausted for {url}") + raise LLMProviderUnavailable(f"All retries exhausted for {_LLM_SERVER_LABEL}") diff --git a/tests/sdk/test_httpx_client_errors.py b/tests/sdk/test_httpx_client_errors.py new file mode 100644 index 000000000..396eadecd --- /dev/null +++ b/tests/sdk/test_httpx_client_errors.py @@ -0,0 +1,15 @@ +"""Regression: LLM error messages must not expose gateway URLs in the REPL.""" + +from __future__ import annotations + + +class TestLlmErrorMessagesHideGatewayUrl: + def test_user_facing_messages_use_branded_server_label(self): + from cai.sdk.agents.models.chatcompletions import httpx_client + import inspect + + source = inspect.getsource(httpx_client) + assert "_LLM_SERVER_LABEL" in source + assert httpx_client._LLM_SERVER_LABEL == "Alias Robotics® LLM servers" + assert "retries from {url}" not in source + assert "All retries exhausted for {url}" not in source From 967762950c33f2dba509079152d3169fe55f0ef9 Mon Sep 17 00:00:00 2001 From: Yahya Date: Mon, 8 Jun 2026 09:58:27 +0200 Subject: [PATCH 6/7] fix(chatcompletions): repair truncated tool-call args before emission A streaming tool call cut off after the opening brace would leave state.function_calls[i].arguments = "{". CAI persisted that into conversation history verbatim, and on the next turn the upstream litellm proxy strict-parsed it as JSON and rejected the whole request with HTTP 400 ("unexpected end of data: line 1 column 2"), wedging the session. - openai_chatcompletions.py: at end-of-stream, validate every accumulated function_call.arguments string. If empty or not valid JSON, normalize to "{}" before emitting events. - message_builder.py: same guard when replaying assistant tool_calls out of memory, so a poisoned history loaded from disk also recovers. --- .../models/chatcompletions/message_builder.py | 8 ++++++++ src/cai/sdk/agents/models/openai_chatcompletions.py | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/cai/sdk/agents/models/chatcompletions/message_builder.py b/src/cai/sdk/agents/models/chatcompletions/message_builder.py index e75f32ef7..a1f8968f0 100644 --- a/src/cai/sdk/agents/models/chatcompletions/message_builder.py +++ b/src/cai/sdk/agents/models/chatcompletions/message_builder.py @@ -424,6 +424,14 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: arguments = "{}" elif isinstance(arguments, dict): arguments = json.dumps(arguments) + else: + # Truncated/streamed-then-cut function call args (e.g. "{") + # are not valid JSON and the upstream proxy rejects the whole + # request with HTTP 400, wedging the conversation. + try: + json.loads(arguments) + except (TypeError, ValueError): + arguments = "{}" tool_calls_param.append( ChatCompletionMessageToolCallParam( id=tc.get("id", "")[:40], diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py index 47d871b0b..7156de4d0 100644 --- a/src/cai/sdk/agents/models/openai_chatcompletions.py +++ b/src/cai/sdk/agents/models/openai_chatcompletions.py @@ -2676,6 +2676,19 @@ def next_sequence_number() -> int: type="response.content_part.done", ) + # Repair partially-streamed function calls before emission so a + # truncated args string like "{" cannot poison conversation history + # and trigger HTTP 400 on the next request. + for _fc in state.function_calls.values(): + _args = _fc.arguments or "" + if not _args.strip(): + _fc.arguments = "{}" + else: + try: + json.loads(_args) + except (TypeError, ValueError): + _fc.arguments = "{}" + # Actually send events for the function calls for function_call in state.function_calls.values(): # First, a ResponseOutputItemAdded for the function call From cc82345bc0453c134b82ad888606d75b2eb375dc Mon Sep 17 00:00:00 2001 From: Yahya Date: Mon, 8 Jun 2026 10:27:46 +0200 Subject: [PATCH 7/7] fix(chatcompletions): only repair args that look like truncated JSON The previous repair pass rewrote any non-parseable arguments string to "{}", which clobbered legitimate accumulator contents in test_stream_response_yields_events_for_tool_call (concatenated raw deltas like "arg1arg2"). Narrow the guard: only when the stripped buffer starts with "{" or "[" *and* fails to parse do we replace it with "{}". Everything else is left untouched. --- .../models/chatcompletions/message_builder.py | 14 +++++++++----- .../sdk/agents/models/openai_chatcompletions.py | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/cai/sdk/agents/models/chatcompletions/message_builder.py b/src/cai/sdk/agents/models/chatcompletions/message_builder.py index a1f8968f0..c2631f0cf 100644 --- a/src/cai/sdk/agents/models/chatcompletions/message_builder.py +++ b/src/cai/sdk/agents/models/chatcompletions/message_builder.py @@ -427,11 +427,15 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: else: # Truncated/streamed-then-cut function call args (e.g. "{") # are not valid JSON and the upstream proxy rejects the whole - # request with HTTP 400, wedging the conversation. - try: - json.loads(arguments) - except (TypeError, ValueError): - arguments = "{}" + # request with HTTP 400, wedging the conversation. Only repair + # half-finished JSON object/array buffers; leave anything else + # (raw concatenated deltas, provider-specific blobs) alone. + stripped = arguments.lstrip() if isinstance(arguments, str) else "" + if stripped and stripped[0] in "{[": + try: + json.loads(arguments) + except (TypeError, ValueError): + arguments = "{}" tool_calls_param.append( ChatCompletionMessageToolCallParam( id=tc.get("id", "")[:40], diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py index 7156de4d0..ff91cbf66 100644 --- a/src/cai/sdk/agents/models/openai_chatcompletions.py +++ b/src/cai/sdk/agents/models/openai_chatcompletions.py @@ -2678,12 +2678,16 @@ def next_sequence_number() -> int: # Repair partially-streamed function calls before emission so a # truncated args string like "{" cannot poison conversation history - # and trigger HTTP 400 on the next request. + # and trigger HTTP 400 on the next request. Only rewrite when the + # buffer is clearly a half-finished JSON object/array; leave other + # accumulator contents (raw concatenated deltas, provider quirks) + # untouched so the rest of the pipeline can decide. for _fc in state.function_calls.values(): _args = _fc.arguments or "" - if not _args.strip(): + _stripped = _args.lstrip() + if not _stripped: _fc.arguments = "{}" - else: + elif _stripped[0] in "{[": try: json.loads(_args) except (TypeError, ValueError):