From fda018a324ee44606b96327e98c8da5e97940b0a Mon Sep 17 00:00:00 2001
From: Rufino Cabrera <rufino@aliasrobotics.com>
Date: Thu, 11 Jun 2026 10:25:45 +0200
Subject: [PATCH 1/7] Fix REPL y/N prompts showing ^M after Enter

Restore cooked TTY state after prompt_toolkit exits so Rich console.input
and other follow-up prompts (e.g. /compact confirmation) accept Enter normally.
---
 src/cai/repl/ui/prompt.py           | 10 ++++++++++
 tests/repl/test_multiline_prompt.py | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/src/cai/repl/ui/prompt.py b/src/cai/repl/ui/prompt.py
index daf5f9501..a3b351e6e 100644
--- a/src/cai/repl/ui/prompt.py
+++ b/src/cai/repl/ui/prompt.py
@@ -216,6 +216,7 @@ def _toolbar_with_separator():
         return [sep_line]
 
     # Get user input with all features
+    result = ""
     try:
         result = prompt(
             [("class:prompt", "CAI> ")],
@@ -244,6 +245,15 @@ def _toolbar_with_separator():
         except (AttributeError, OSError):
             _REPL_STDIN_EXHAUSTED_PENDING = True
         return ""
+    finally:
+        # prompt_toolkit restores termios from the snapshot taken after we cleared
+        # ICRNL above; Rich console.input and plain input() then echo Enter as ^M.
+        try:
+            from cai.util.streaming import restore_terminal_state
+
+            restore_terminal_state(emit_trailing_newline=False)
+        except Exception:
+            pass
 
     # Print bottom separator only when user submitted non-empty input,
     # so that empty Enter produces a single separator between prompts.
diff --git a/tests/repl/test_multiline_prompt.py b/tests/repl/test_multiline_prompt.py
index e49384989..4835055b0 100644
--- a/tests/repl/test_multiline_prompt.py
+++ b/tests/repl/test_multiline_prompt.py
@@ -33,6 +33,29 @@ def test_multiline_is_enabled_in_prompt_config(self):
             'Without this, Enter may only insert newlines after long agent turns.'
         )
 
+    def test_tty_restored_after_prompt(self):
+        """Cooked TTY must be restored after prompt() for follow-up console.input prompts.
+
+        We clear ICRNL before prompt_toolkit starts; on exit it restores that snapshot,
+        so /compact and other y/N confirmations see Enter as ^M unless we run
+        restore_terminal_state again in a finally block.
+        """
+        from cai.repl.ui.prompt import get_user_input
+        import inspect
+
+        source = inspect.getsource(get_user_input)
+
+        assert 'finally:' in source, (
+            'REGRESSION: get_user_input must use finally to restore the TTY after '
+            'prompt_toolkit exits. Without this, Rich console.input shows ^M on Enter.'
+        )
+        assert source.index('finally:') > source.index('prompt('), (
+            'REGRESSION: TTY restore must run after prompt(), not only before it.'
+        )
+        assert 'restore_terminal_state' in source[source.index('finally:'):], (
+            'REGRESSION: finally block must call restore_terminal_state after prompt().'
+        )
+
     def test_icrnl_cleared_before_prompt(self):
         """ICRNL/INLCR/IGNCR must be cleared before prompt() to keep Enter as submit.
 

From 937fbb5cc192291a522109d67c0f2119eb42d108 Mon Sep 17 00:00:00 2001
From: Rufino Cabrera <rufino@aliasrobotics.com>
Date: Thu, 11 Jun 2026 10:47:59 +0200
Subject: [PATCH 2/7] Fix /compact y/N prompt echoing ^M on Enter

Force cooked canonical TTY mode after prompt_toolkit and route /compact
confirmation through read_repl_yes_no, which restores the terminal and
strips stray carriage returns from the answer.
---
 src/cai/repl/commands/compact.py    | 10 +++---
 src/cai/repl/ui/tty_input.py        | 52 +++++++++++++++++++++++++++++
 src/cai/util/streaming.py           | 26 +++++++++++++++
 tests/repl/test_multiline_prompt.py | 11 ++++++
 tests/repl/test_tty_input.py        | 48 ++++++++++++++++++++++++++
 5 files changed, 142 insertions(+), 5 deletions(-)
 create mode 100644 src/cai/repl/ui/tty_input.py
 create mode 100644 tests/repl/test_tty_input.py

diff --git a/src/cai/repl/commands/compact.py b/src/cai/repl/commands/compact.py
index 8dba1cfef..af2fc0e91 100644
--- a/src/cai/repl/commands/compact.py
+++ b/src/cai/repl/commands/compact.py
@@ -13,6 +13,7 @@
 from rich.panel import Panel
 
 from cai.repl.commands.base import Command, register_command
+from cai.repl.ui.tty_input import read_repl_yes_no
 from cai.sdk.agents.models.openai_chatcompletions import get_current_active_model
 from cai.repl.commands.model import (
     get_all_predefined_models,
@@ -572,11 +573,10 @@ def handle_cleanup():
             f"\n[#9aa0a6][CAI] Compact current conversation? [/]"
             f"[bold white]({msg_count} messages)[/bold white]"
         )
-        confirm = console.input(
-            "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]"
-        )
-
-        if confirm.lower() == "y":
+        if read_repl_yes_no(
+            console,
+            "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]",
+        ):
             # Pass the detected agent name to _perform_compaction
             return self._perform_compaction(None, None, agent_name=agent_name)
         else:
diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py
new file mode 100644
index 000000000..6e9dc63e0
--- /dev/null
+++ b/src/cai/repl/ui/tty_input.py
@@ -0,0 +1,52 @@
+"""Line-oriented prompts after prompt_toolkit or Rich Live (y/N, confirmations)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from rich.console import Console
+
+
+def prepare_tty_for_line_input() -> None:
+    """Restore cooked TTY before ``input()`` / Rich ``console.input``."""
+    from cai.util.streaming import ensure_cooked_tty, restore_terminal_state
+
+    restore_terminal_state(emit_trailing_newline=False)
+    ensure_cooked_tty()
+
+
+def normalize_repl_line(value: str) -> str:
+    """Strip carriage returns left over from non-canonical TTY reads."""
+    return value.replace("\r", "").strip()
+
+
+def read_repl_line(
+    console: Console,
+    prompt: str = "",
+    *,
+    markup: bool = True,
+) -> str:
+    """Read one line with Rich prompt styling; safe after the main CAI> prompt."""
+    prepare_tty_for_line_input()
+    if prompt:
+        console.print(prompt, markup=markup, emoji=False, end="")
+    try:
+        raw = input()
+    except EOFError:
+        return ""
+    return normalize_repl_line(raw)
+
+
+def read_repl_yes_no(
+    console: Console,
+    prompt: str,
+    *,
+    default: bool = False,
+    markup: bool = True,
+) -> bool:
+    """Return True when the user answers y/yes (default answer when they press Enter)."""
+    answer = read_repl_line(console, prompt, markup=markup)
+    if not answer:
+        return default
+    return answer.lower() in ("y", "yes")
diff --git a/src/cai/util/streaming.py b/src/cai/util/streaming.py
index ead368db2..628d1743a 100644
--- a/src/cai/util/streaming.py
+++ b/src/cai/util/streaming.py
@@ -1386,6 +1386,32 @@ def _reset_controlling_tty_sane() -> None:
         )
     except Exception:
         pass
+    ensure_cooked_tty()
+
+
+def ensure_cooked_tty() -> None:
+    """Force canonical line input so ``input()`` / Rich ``console.input`` accept Enter.
+
+    ``stty sane`` alone is not always enough after prompt_toolkit: it may restore a
+    snapshot taken while ICRNL was cleared, leaving Enter as raw ``\\r`` (shown as ^M).
+    """
+    if not sys.stdin.isatty():
+        return
+    try:
+        import termios
+
+        fd = sys.stdin.fileno()
+        attrs = termios.tcgetattr(fd)
+        iflag, _, _, lflag, _, cc = attrs
+        iflag |= termios.ICRNL | termios.BRKINT
+        iflag &= ~(termios.INLCR | termios.IGNCR)
+        lflag |= termios.ICANON | termios.ECHO | termios.ISIG
+        cc[termios.VMIN] = 1
+        cc[termios.VTIME] = 0
+        termios.tcsetattr(fd, termios.TCSADRAIN, (iflag, attrs[1], attrs[2], lflag, attrs[4], cc))
+        termios.tcflush(fd, termios.TCIFLUSH)
+    except Exception:
+        pass
 
 
 def restore_terminal_state(
diff --git a/tests/repl/test_multiline_prompt.py b/tests/repl/test_multiline_prompt.py
index 4835055b0..1fc004697 100644
--- a/tests/repl/test_multiline_prompt.py
+++ b/tests/repl/test_multiline_prompt.py
@@ -56,6 +56,17 @@ def test_tty_restored_after_prompt(self):
             'REGRESSION: finally block must call restore_terminal_state after prompt().'
         )
 
+    def test_ensure_cooked_tty_after_stty_sane(self):
+        """stty sane must be followed by explicit cooked termios for y/N prompts."""
+        from cai.util.streaming import _reset_controlling_tty_sane
+        import inspect
+
+        source = inspect.getsource(_reset_controlling_tty_sane)
+        assert 'ensure_cooked_tty' in source, (
+            'REGRESSION: _reset_controlling_tty_sane must call ensure_cooked_tty so '
+            'follow-up console.input accepts Enter instead of echoing ^M.'
+        )
+
     def test_icrnl_cleared_before_prompt(self):
         """ICRNL/INLCR/IGNCR must be cleared before prompt() to keep Enter as submit.
 
diff --git a/tests/repl/test_tty_input.py b/tests/repl/test_tty_input.py
new file mode 100644
index 000000000..13af3e2c2
--- /dev/null
+++ b/tests/repl/test_tty_input.py
@@ -0,0 +1,48 @@
+"""Tests for REPL follow-up prompts (y/N after prompt_toolkit)."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+from cai.repl.ui.tty_input import (
+    normalize_repl_line,
+    read_repl_line,
+    read_repl_yes_no,
+)
+
+
+class TestNormalizeReplLine:
+    def test_strips_carriage_return_from_enter(self):
+        assert normalize_repl_line("y\r") == "y"
+
+    def test_strips_whitespace(self):
+        assert normalize_repl_line("  yes  \r\n") == "yes"
+
+
+class TestReadReplYesNo:
+    @patch("cai.repl.ui.tty_input.input", return_value="y\r")
+    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
+    def test_yes_with_carriage_return(self, _prepare, _input):
+        console = MagicMock()
+        assert read_repl_yes_no(console, "Continue? (y/N): ") is True
+
+    @patch("cai.repl.ui.tty_input.input", return_value="")
+    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
+    def test_empty_defaults_to_no(self, _prepare, _input):
+        console = MagicMock()
+        assert read_repl_yes_no(console, "Continue? (y/N): ", default=False) is False
+
+    @patch("cai.repl.ui.tty_input.input", return_value="n")
+    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
+    def test_no_answer(self, _prepare, _input):
+        console = MagicMock()
+        assert read_repl_yes_no(console, "Continue? (y/N): ") is False
+
+
+class TestReadReplLine:
+    @patch("cai.repl.ui.tty_input.input", return_value="RESET\r")
+    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
+    def test_prepares_tty_before_read(self, prepare, _input):
+        console = MagicMock()
+        assert read_repl_line(console, "> ") == "RESET"
+        prepare.assert_called_once()

From 95506508a28d027a4fa3549270ac4623bfe4bf82 Mon Sep 17 00:00:00 2001
From: Rufino Cabrera <rufino@aliasrobotics.com>
Date: Thu, 11 Jun 2026 11:17:53 +0200
Subject: [PATCH 3/7] Use prompt_toolkit for /compact y/N confirmation

Rich console.input still echoed ^M after prompt_toolkit even with cooked
TTY restore. Route compact confirmation through prompt_toolkit like the
main CAI> prompt and other interactive agent flows.
---
 src/cai/repl/commands/compact.py |  5 +---
 src/cai/repl/ui/tty_input.py     | 51 ++++++++++++++++++++++++--------
 tests/repl/test_tty_input.py     | 34 ++++++++++-----------
 3 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/src/cai/repl/commands/compact.py b/src/cai/repl/commands/compact.py
index af2fc0e91..00df04085 100644
--- a/src/cai/repl/commands/compact.py
+++ b/src/cai/repl/commands/compact.py
@@ -573,10 +573,7 @@ def handle_cleanup():
             f"\n[#9aa0a6][CAI] Compact current conversation? [/]"
             f"[bold white]({msg_count} messages)[/bold white]"
         )
-        if read_repl_yes_no(
-            console,
-            "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]",
-        ):
+        if read_repl_yes_no(console, "Compact conversation?"):
             # Pass the detected agent name to _perform_compaction
             return self._perform_compaction(None, None, agent_name=agent_name)
         else:
diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py
index 6e9dc63e0..9f801635d 100644
--- a/src/cai/repl/ui/tty_input.py
+++ b/src/cai/repl/ui/tty_input.py
@@ -7,13 +7,14 @@
 if TYPE_CHECKING:
     from rich.console import Console
 
+_CAI_GREY = "#9aa0a6"
+_CAI_GREEN = "#00ff9d"
 
-def prepare_tty_for_line_input() -> None:
-    """Restore cooked TTY before ``input()`` / Rich ``console.input``."""
-    from cai.util.streaming import ensure_cooked_tty, restore_terminal_state
+
+def _restore_tty_after_prompt() -> None:
+    from cai.util.streaming import restore_terminal_state
 
     restore_terminal_state(emit_trailing_newline=False)
-    ensure_cooked_tty()
 
 
 def normalize_repl_line(value: str) -> str:
@@ -27,26 +28,52 @@ def read_repl_line(
     *,
     markup: bool = True,
 ) -> str:
-    """Read one line with Rich prompt styling; safe after the main CAI> prompt."""
-    prepare_tty_for_line_input()
-    if prompt:
-        console.print(prompt, markup=markup, emoji=False, end="")
+    """Read one line using prompt_toolkit (same stack as the CAI> prompt)."""
+    from prompt_toolkit import prompt as ptk_prompt
+    from prompt_toolkit.formatted_text import HTML
+
+    _restore_tty_after_prompt()
     try:
-        raw = input()
-    except EOFError:
+        if prompt and markup:
+            # Rich markup is for scrollback context only; ptk owns the input line.
+            console.print(prompt, markup=markup, emoji=False, end="")
+            raw = ptk_prompt("")
+        elif prompt:
+            raw = ptk_prompt(prompt)
+        else:
+            raw = ptk_prompt("")
+    except (EOFError, KeyboardInterrupt):
         return ""
+    finally:
+        _restore_tty_after_prompt()
     return normalize_repl_line(raw)
 
 
 def read_repl_yes_no(
     console: Console,
-    prompt: str,
+    label: str,
     *,
     default: bool = False,
     markup: bool = True,
 ) -> bool:
     """Return True when the user answers y/yes (default answer when they press Enter)."""
-    answer = read_repl_line(console, prompt, markup=markup)
+    from prompt_toolkit import prompt as ptk_prompt
+    from prompt_toolkit.formatted_text import HTML
+
+    _restore_tty_after_prompt()
+    suffix = "Y/n" if default else "y/N"
+    ptk_prompt_text = HTML(
+        f'<style fg="{_CAI_GREY}">[CAI]</style> {label} '
+        f'<style fg="{_CAI_GREEN}"><b>({suffix})</b></style>: '
+    )
+    try:
+        raw = ptk_prompt(ptk_prompt_text)
+    except (EOFError, KeyboardInterrupt):
+        return default
+    finally:
+        _restore_tty_after_prompt()
+
+    answer = normalize_repl_line(raw)
     if not answer:
         return default
     return answer.lower() in ("y", "yes")
diff --git a/tests/repl/test_tty_input.py b/tests/repl/test_tty_input.py
index 13af3e2c2..0d44ec577 100644
--- a/tests/repl/test_tty_input.py
+++ b/tests/repl/test_tty_input.py
@@ -20,29 +20,29 @@ def test_strips_whitespace(self):
 
 
 class TestReadReplYesNo:
-    @patch("cai.repl.ui.tty_input.input", return_value="y\r")
-    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
-    def test_yes_with_carriage_return(self, _prepare, _input):
+    @patch("prompt_toolkit.prompt", return_value="y\r")
+    @patch("cai.repl.ui.tty_input._restore_tty_after_prompt")
+    def test_yes_with_carriage_return(self, _restore, _prompt):
         console = MagicMock()
-        assert read_repl_yes_no(console, "Continue? (y/N): ") is True
+        assert read_repl_yes_no(console, "Continue") is True
 
-    @patch("cai.repl.ui.tty_input.input", return_value="")
-    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
-    def test_empty_defaults_to_no(self, _prepare, _input):
+    @patch("prompt_toolkit.prompt", return_value="")
+    @patch("cai.repl.ui.tty_input._restore_tty_after_prompt")
+    def test_empty_defaults_to_no(self, _restore, _prompt):
         console = MagicMock()
-        assert read_repl_yes_no(console, "Continue? (y/N): ", default=False) is False
+        assert read_repl_yes_no(console, "Continue", default=False) is False
 
-    @patch("cai.repl.ui.tty_input.input", return_value="n")
-    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
-    def test_no_answer(self, _prepare, _input):
+    @patch("prompt_toolkit.prompt", return_value="n")
+    @patch("cai.repl.ui.tty_input._restore_tty_after_prompt")
+    def test_no_answer(self, _restore, _prompt):
         console = MagicMock()
-        assert read_repl_yes_no(console, "Continue? (y/N): ") is False
+        assert read_repl_yes_no(console, "Continue") is False
 
 
 class TestReadReplLine:
-    @patch("cai.repl.ui.tty_input.input", return_value="RESET\r")
-    @patch("cai.repl.ui.tty_input.prepare_tty_for_line_input")
-    def test_prepares_tty_before_read(self, prepare, _input):
+    @patch("prompt_toolkit.prompt", return_value="RESET\r")
+    @patch("cai.repl.ui.tty_input._restore_tty_after_prompt")
+    def test_restores_tty_around_prompt(self, restore, _prompt):
         console = MagicMock()
-        assert read_repl_line(console, "> ") == "RESET"
-        prepare.assert_called_once()
+        assert read_repl_line(console, "> ", markup=False) == "RESET"
+        assert restore.call_count == 2

From f454c41e94da4a76ac128b7af6f52d1abe4d99f7 Mon Sep 17 00:00:00 2001
From: Rufino Cabrera <rufino@aliasrobotics.com>
Date: Thu, 11 Jun 2026 11:26:40 +0200
Subject: [PATCH 4/7] Remove unused HTML import from read_repl_line

---
 src/cai/repl/ui/tty_input.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py
index 9f801635d..2c78631a8 100644
--- a/src/cai/repl/ui/tty_input.py
+++ b/src/cai/repl/ui/tty_input.py
@@ -30,7 +30,6 @@ def read_repl_line(
 ) -> str:
     """Read one line using prompt_toolkit (same stack as the CAI> prompt)."""
     from prompt_toolkit import prompt as ptk_prompt
-    from prompt_toolkit.formatted_text import HTML
 
     _restore_tty_after_prompt()
     try:

From 23739d06312536f5ad88a849ee948d929cc6a859 Mon Sep 17 00:00:00 2001
From: Rufino Cabrera <rufino@aliasrobotics.com>
Date: Thu, 11 Jun 2026 12:14:09 +0200
Subject: [PATCH 5/7] Hide LLM gateway URL from user-facing error messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace endpoint URLs in httpx retry exhaustion errors with the branded
Alias Robotics® LLM servers label; URLs remain in debug logs only.
---
 .../models/chatcompletions/httpx_client.py    | 40 ++++++++++++++-----
 tests/sdk/test_httpx_client_errors.py         | 15 +++++++
 2 files changed, 44 insertions(+), 11 deletions(-)
 create mode 100644 tests/sdk/test_httpx_client_errors.py

diff --git a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py
index 3d428299a..848c83971 100644
--- a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py
+++ b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py
@@ -43,6 +43,9 @@
 # HTTP status codes that trigger automatic retry
 _RETRYABLE_STATUS = {429, 502, 503, 504, 529}
 
+# User-facing label — never embed gateway URLs in exception text shown in the REPL.
+_LLM_SERVER_LABEL = "Alias Robotics® LLM servers"
+
 _LOG = logging.getLogger(__name__)
 
 
@@ -197,11 +200,17 @@ async def _stream_gen() -> AsyncIterator:
                                 # Retries exhausted — raise typed error
                                 if resp.status_code == 429:
                                     raise LLMRateLimited(
-                                        f"Rate limited (429) after {_MAX_RETRIES} retries from {url}",
+                                        (
+                                            f"Rate limited (429) after {_MAX_RETRIES} retries "
+                                            f"from {_LLM_SERVER_LABEL}"
+                                        ),
                                         retry_after=_extract_retry_after(resp),
                                     )
                                 raise LLMProviderUnavailable(
-                                    f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}"
+                                    (
+                                        f"Server error ({resp.status_code}) after "
+                                        f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}"
+                                    )
                                 )
 
                             # HTTP 413: request body exceeds gateway/proxy POST
@@ -215,7 +224,7 @@ async def _stream_gen() -> AsyncIterator:
                                     pass
                                 _log_failed_completion_response(resp, url)
                                 raise LLMContextOverflow(
-                                    f"Request body too large (413) for {url}",
+                                    f"Request body too large (413) for {_LLM_SERVER_LABEL}",
                                     details=_build_413_details(url, body),
                                 )
 
@@ -312,11 +321,11 @@ async def _stream_gen() -> AsyncIterator:
                             continue
                         if e.response.status_code == 429:
                             raise LLMRateLimited(
-                                f"Rate limited (429) after retries from {url}"
+                                f"Rate limited (429) after retries from {_LLM_SERVER_LABEL}"
                             ) from e
                         if e.response.status_code in (408, 504):
                             raise LLMTimeout(
-                                f"Timeout ({e.response.status_code}) from {url}"
+                                f"Timeout ({e.response.status_code}) from {_LLM_SERVER_LABEL}"
                             ) from e
                         raise
 
@@ -368,19 +377,28 @@ async def _stream_gen() -> AsyncIterator:
                         # Retries exhausted — raise typed error
                         if resp.status_code == 429:
                             raise LLMRateLimited(
-                                f"Rate limited (429) after {_MAX_RETRIES} retries from {url}",
+                                (
+                                    f"Rate limited (429) after {_MAX_RETRIES} retries "
+                                    f"from {_LLM_SERVER_LABEL}"
+                                ),
                                 retry_after=_extract_retry_after(resp),
                             )
                         if resp.status_code in (408, 504):
                             raise LLMTimeout(
-                                f"Timeout ({resp.status_code}) after {_MAX_RETRIES} retries from {url}"
+                                (
+                                    f"Timeout ({resp.status_code}) after {_MAX_RETRIES} "
+                                    f"retries from {_LLM_SERVER_LABEL}"
+                                )
                             )
                         raise LLMProviderUnavailable(
-                            f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}"
+                            (
+                                f"Server error ({resp.status_code}) after "
+                                f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}"
+                            )
                         )
 
                     if resp.status_code in (408,):
-                        raise LLMTimeout(f"Timeout ({resp.status_code}) from {url}")
+                        raise LLMTimeout(f"Timeout ({resp.status_code}) from {_LLM_SERVER_LABEL}")
 
                     # HTTP 413: request body exceeds gateway/proxy POST size cap.
                     # Not in _RETRYABLE_STATUS because resending the same body
@@ -390,7 +408,7 @@ async def _stream_gen() -> AsyncIterator:
                     if resp.status_code == 413:
                         _log_failed_completion_response(resp, url)
                         raise LLMContextOverflow(
-                            f"Request body too large (413) for {url}",
+                            f"Request body too large (413) for {_LLM_SERVER_LABEL}",
                             details=_build_413_details(url, body),
                         )
 
@@ -422,4 +440,4 @@ async def _stream_gen() -> AsyncIterator:
             # Should not reach here
             if last_error:
                 raise last_error
-            raise LLMProviderUnavailable(f"All retries exhausted for {url}")
+            raise LLMProviderUnavailable(f"All retries exhausted for {_LLM_SERVER_LABEL}")
diff --git a/tests/sdk/test_httpx_client_errors.py b/tests/sdk/test_httpx_client_errors.py
new file mode 100644
index 000000000..396eadecd
--- /dev/null
+++ b/tests/sdk/test_httpx_client_errors.py
@@ -0,0 +1,15 @@
+"""Regression: LLM error messages must not expose gateway URLs in the REPL."""
+
+from __future__ import annotations
+
+
+class TestLlmErrorMessagesHideGatewayUrl:
+    def test_user_facing_messages_use_branded_server_label(self):
+        from cai.sdk.agents.models.chatcompletions import httpx_client
+        import inspect
+
+        source = inspect.getsource(httpx_client)
+        assert "_LLM_SERVER_LABEL" in source
+        assert httpx_client._LLM_SERVER_LABEL == "Alias Robotics® LLM servers"
+        assert "retries from {url}" not in source
+        assert "All retries exhausted for {url}" not in source

From 967762950c33f2dba509079152d3169fe55f0ef9 Mon Sep 17 00:00:00 2001
From: Yahya <yahya@aliasrobotics.com>
Date: Mon, 8 Jun 2026 09:58:27 +0200
Subject: [PATCH 6/7] fix(chatcompletions): repair truncated tool-call args
 before emission

A streaming tool call cut off after the opening brace would leave
state.function_calls[i].arguments = "{". CAI persisted that into
conversation history verbatim, and on the next turn the upstream
litellm proxy strict-parsed it as JSON and rejected the whole
request with HTTP 400 ("unexpected end of data: line 1 column 2"),
wedging the session.

- openai_chatcompletions.py: at end-of-stream, validate every
  accumulated function_call.arguments string. If empty or not
  valid JSON, normalize to "{}" before emitting events.
- message_builder.py: same guard when replaying assistant tool_calls
  out of memory, so a poisoned history loaded from disk also recovers.
---
 .../models/chatcompletions/message_builder.py       |  8 ++++++++
 src/cai/sdk/agents/models/openai_chatcompletions.py | 13 +++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/cai/sdk/agents/models/chatcompletions/message_builder.py b/src/cai/sdk/agents/models/chatcompletions/message_builder.py
index e75f32ef7..a1f8968f0 100644
--- a/src/cai/sdk/agents/models/chatcompletions/message_builder.py
+++ b/src/cai/sdk/agents/models/chatcompletions/message_builder.py
@@ -424,6 +424,14 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
                         arguments = "{}"
                     elif isinstance(arguments, dict):
                         arguments = json.dumps(arguments)
+                    else:
+                        # Truncated/streamed-then-cut function call args (e.g. "{")
+                        # are not valid JSON and the upstream proxy rejects the whole
+                        # request with HTTP 400, wedging the conversation.
+                        try:
+                            json.loads(arguments)
+                        except (TypeError, ValueError):
+                            arguments = "{}"
                     tool_calls_param.append(
                         ChatCompletionMessageToolCallParam(
                             id=tc.get("id", "")[:40],
diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py
index 47d871b0b..7156de4d0 100644
--- a/src/cai/sdk/agents/models/openai_chatcompletions.py
+++ b/src/cai/sdk/agents/models/openai_chatcompletions.py
@@ -2676,6 +2676,19 @@ def next_sequence_number() -> int:
                         type="response.content_part.done",
                     )
 
+                # Repair partially-streamed function calls before emission so a
+                # truncated args string like "{" cannot poison conversation history
+                # and trigger HTTP 400 on the next request.
+                for _fc in state.function_calls.values():
+                    _args = _fc.arguments or ""
+                    if not _args.strip():
+                        _fc.arguments = "{}"
+                    else:
+                        try:
+                            json.loads(_args)
+                        except (TypeError, ValueError):
+                            _fc.arguments = "{}"
+
                 # Actually send events for the function calls
                 for function_call in state.function_calls.values():
                     # First, a ResponseOutputItemAdded for the function call

From cc82345bc0453c134b82ad888606d75b2eb375dc Mon Sep 17 00:00:00 2001
From: Yahya <yahya@aliasrobotics.com>
Date: Mon, 8 Jun 2026 10:27:46 +0200
Subject: [PATCH 7/7] fix(chatcompletions): only repair args that look like
 truncated JSON

The previous repair pass rewrote any non-parseable arguments string to
"{}", which clobbered legitimate accumulator contents in
test_stream_response_yields_events_for_tool_call (concatenated raw
deltas like "arg1arg2"). Narrow the guard: only when the stripped
buffer starts with "{" or "[" *and* fails to parse do we replace it
with "{}". Everything else is left untouched.
---
 .../models/chatcompletions/message_builder.py      | 14 +++++++++-----
 .../sdk/agents/models/openai_chatcompletions.py    | 10 +++++++---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/cai/sdk/agents/models/chatcompletions/message_builder.py b/src/cai/sdk/agents/models/chatcompletions/message_builder.py
index a1f8968f0..c2631f0cf 100644
--- a/src/cai/sdk/agents/models/chatcompletions/message_builder.py
+++ b/src/cai/sdk/agents/models/chatcompletions/message_builder.py
@@ -427,11 +427,15 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
                     else:
                         # Truncated/streamed-then-cut function call args (e.g. "{")
                         # are not valid JSON and the upstream proxy rejects the whole
-                        # request with HTTP 400, wedging the conversation.
-                        try:
-                            json.loads(arguments)
-                        except (TypeError, ValueError):
-                            arguments = "{}"
+                        # request with HTTP 400, wedging the conversation. Only repair
+                        # half-finished JSON object/array buffers; leave anything else
+                        # (raw concatenated deltas, provider-specific blobs) alone.
+                        stripped = arguments.lstrip() if isinstance(arguments, str) else ""
+                        if stripped and stripped[0] in "{[":
+                            try:
+                                json.loads(arguments)
+                            except (TypeError, ValueError):
+                                arguments = "{}"
                     tool_calls_param.append(
                         ChatCompletionMessageToolCallParam(
                             id=tc.get("id", "")[:40],
diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py
index 7156de4d0..ff91cbf66 100644
--- a/src/cai/sdk/agents/models/openai_chatcompletions.py
+++ b/src/cai/sdk/agents/models/openai_chatcompletions.py
@@ -2678,12 +2678,16 @@ def next_sequence_number() -> int:
 
                 # Repair partially-streamed function calls before emission so a
                 # truncated args string like "{" cannot poison conversation history
-                # and trigger HTTP 400 on the next request.
+                # and trigger HTTP 400 on the next request. Only rewrite when the
+                # buffer is clearly a half-finished JSON object/array; leave other
+                # accumulator contents (raw concatenated deltas, provider quirks)
+                # untouched so the rest of the pipeline can decide.
                 for _fc in state.function_calls.values():
                     _args = _fc.arguments or ""
-                    if not _args.strip():
+                    _stripped = _args.lstrip()
+                    if not _stripped:
                         _fc.arguments = "{}"
-                    else:
+                    elif _stripped[0] in "{[":
                         try:
                             json.loads(_args)
                         except (TypeError, ValueError):