diff --git a/src/cai/repl/commands/compact.py b/src/cai/repl/commands/compact.py index 8dba1cfef..00df04085 100644 --- a/src/cai/repl/commands/compact.py +++ b/src/cai/repl/commands/compact.py @@ -13,6 +13,7 @@ from rich.panel import Panel from cai.repl.commands.base import Command, register_command +from cai.repl.ui.tty_input import read_repl_yes_no from cai.sdk.agents.models.openai_chatcompletions import get_current_active_model from cai.repl.commands.model import ( get_all_predefined_models, @@ -572,11 +573,7 @@ def handle_cleanup(): f"\n[#9aa0a6][CAI] Compact current conversation? [/]" f"[bold white]({msg_count} messages)[/bold white]" ) - confirm = console.input( - "[#9aa0a6][CAI] Compact conversation? [/][bold #00ff9d](y/N): [/]" - ) - - if confirm.lower() == "y": + if read_repl_yes_no(console, "Compact conversation?"): # Pass the detected agent name to _perform_compaction return self._perform_compaction(None, None, agent_name=agent_name) else: diff --git a/src/cai/repl/ui/prompt.py b/src/cai/repl/ui/prompt.py index daf5f9501..a3b351e6e 100644 --- a/src/cai/repl/ui/prompt.py +++ b/src/cai/repl/ui/prompt.py @@ -216,6 +216,7 @@ def _toolbar_with_separator(): return [sep_line] # Get user input with all features + result = "" try: result = prompt( [("class:prompt", "CAI> ")], @@ -244,6 +245,15 @@ def _toolbar_with_separator(): except (AttributeError, OSError): _REPL_STDIN_EXHAUSTED_PENDING = True return "" + finally: + # prompt_toolkit restores termios from the snapshot taken after we cleared + # ICRNL above; Rich console.input and plain input() then echo Enter as ^M. + try: + from cai.util.streaming import restore_terminal_state + + restore_terminal_state(emit_trailing_newline=False) + except Exception: + pass # Print bottom separator only when user submitted non-empty input, # so that empty Enter produces a single separator between prompts. diff --git a/src/cai/repl/ui/tty_input.py b/src/cai/repl/ui/tty_input.py new file mode 100644 index 000000000..2c78631a8 --- /dev/null +++ b/src/cai/repl/ui/tty_input.py @@ -0,0 +1,78 @@ +"""Line-oriented prompts after prompt_toolkit or Rich Live (y/N, confirmations).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from rich.console import Console + +_CAI_GREY = "#9aa0a6" +_CAI_GREEN = "#00ff9d" + + +def _restore_tty_after_prompt() -> None: + from cai.util.streaming import restore_terminal_state + + restore_terminal_state(emit_trailing_newline=False) + + +def normalize_repl_line(value: str) -> str: + """Strip carriage returns left over from non-canonical TTY reads.""" + return value.replace("\r", "").strip() + + +def read_repl_line( + console: Console, + prompt: str = "", + *, + markup: bool = True, +) -> str: + """Read one line using prompt_toolkit (same stack as the CAI> prompt).""" + from prompt_toolkit import prompt as ptk_prompt + + _restore_tty_after_prompt() + try: + if prompt and markup: + # Rich markup is for scrollback context only; ptk owns the input line. + console.print(prompt, markup=markup, emoji=False, end="") + raw = ptk_prompt("") + elif prompt: + raw = ptk_prompt(prompt) + else: + raw = ptk_prompt("") + except (EOFError, KeyboardInterrupt): + return "" + finally: + _restore_tty_after_prompt() + return normalize_repl_line(raw) + + +def read_repl_yes_no( + console: Console, + label: str, + *, + default: bool = False, + markup: bool = True, +) -> bool: + """Return True when the user answers y/yes (default answer when they press Enter).""" + from prompt_toolkit import prompt as ptk_prompt + from prompt_toolkit.formatted_text import HTML + + _restore_tty_after_prompt() + suffix = "Y/n" if default else "y/N" + ptk_prompt_text = HTML( + f' {label} ' + f': ' + ) + try: + raw = ptk_prompt(ptk_prompt_text) + except (EOFError, KeyboardInterrupt): + return default + finally: + _restore_tty_after_prompt() + + answer = normalize_repl_line(raw) + if not answer: + return default + return answer.lower() in ("y", "yes") diff --git a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py index 3d428299a..848c83971 100644 --- a/src/cai/sdk/agents/models/chatcompletions/httpx_client.py +++ b/src/cai/sdk/agents/models/chatcompletions/httpx_client.py @@ -43,6 +43,9 @@ # HTTP status codes that trigger automatic retry _RETRYABLE_STATUS = {429, 502, 503, 504, 529} +# User-facing label — never embed gateway URLs in exception text shown in the REPL. +_LLM_SERVER_LABEL = "Alias Robotics® LLM servers" + _LOG = logging.getLogger(__name__) @@ -197,11 +200,17 @@ async def _stream_gen() -> AsyncIterator: # Retries exhausted — raise typed error if resp.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after {_MAX_RETRIES} retries from {url}", + ( + f"Rate limited (429) after {_MAX_RETRIES} retries " + f"from {_LLM_SERVER_LABEL}" + ), retry_after=_extract_retry_after(resp), ) raise LLMProviderUnavailable( - f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Server error ({resp.status_code}) after " + f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}" + ) ) # HTTP 413: request body exceeds gateway/proxy POST @@ -215,7 +224,7 @@ async def _stream_gen() -> AsyncIterator: pass _log_failed_completion_response(resp, url) raise LLMContextOverflow( - f"Request body too large (413) for {url}", + f"Request body too large (413) for {_LLM_SERVER_LABEL}", details=_build_413_details(url, body), ) @@ -312,11 +321,11 @@ async def _stream_gen() -> AsyncIterator: continue if e.response.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after retries from {url}" + f"Rate limited (429) after retries from {_LLM_SERVER_LABEL}" ) from e if e.response.status_code in (408, 504): raise LLMTimeout( - f"Timeout ({e.response.status_code}) from {url}" + f"Timeout ({e.response.status_code}) from {_LLM_SERVER_LABEL}" ) from e raise @@ -368,19 +377,28 @@ async def _stream_gen() -> AsyncIterator: # Retries exhausted — raise typed error if resp.status_code == 429: raise LLMRateLimited( - f"Rate limited (429) after {_MAX_RETRIES} retries from {url}", + ( + f"Rate limited (429) after {_MAX_RETRIES} retries " + f"from {_LLM_SERVER_LABEL}" + ), retry_after=_extract_retry_after(resp), ) if resp.status_code in (408, 504): raise LLMTimeout( - f"Timeout ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Timeout ({resp.status_code}) after {_MAX_RETRIES} " + f"retries from {_LLM_SERVER_LABEL}" + ) ) raise LLMProviderUnavailable( - f"Server error ({resp.status_code}) after {_MAX_RETRIES} retries from {url}" + ( + f"Server error ({resp.status_code}) after " + f"{_MAX_RETRIES} retries from {_LLM_SERVER_LABEL}" + ) ) if resp.status_code in (408,): - raise LLMTimeout(f"Timeout ({resp.status_code}) from {url}") + raise LLMTimeout(f"Timeout ({resp.status_code}) from {_LLM_SERVER_LABEL}") # HTTP 413: request body exceeds gateway/proxy POST size cap. # Not in _RETRYABLE_STATUS because resending the same body @@ -390,7 +408,7 @@ async def _stream_gen() -> AsyncIterator: if resp.status_code == 413: _log_failed_completion_response(resp, url) raise LLMContextOverflow( - f"Request body too large (413) for {url}", + f"Request body too large (413) for {_LLM_SERVER_LABEL}", details=_build_413_details(url, body), ) @@ -422,4 +440,4 @@ async def _stream_gen() -> AsyncIterator: # Should not reach here if last_error: raise last_error - raise LLMProviderUnavailable(f"All retries exhausted for {url}") + raise LLMProviderUnavailable(f"All retries exhausted for {_LLM_SERVER_LABEL}") diff --git a/src/cai/sdk/agents/models/chatcompletions/message_builder.py b/src/cai/sdk/agents/models/chatcompletions/message_builder.py index e75f32ef7..c2631f0cf 100644 --- a/src/cai/sdk/agents/models/chatcompletions/message_builder.py +++ b/src/cai/sdk/agents/models/chatcompletions/message_builder.py @@ -424,6 +424,18 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: arguments = "{}" elif isinstance(arguments, dict): arguments = json.dumps(arguments) + else: + # Truncated/streamed-then-cut function call args (e.g. "{") + # are not valid JSON and the upstream proxy rejects the whole + # request with HTTP 400, wedging the conversation. Only repair + # half-finished JSON object/array buffers; leave anything else + # (raw concatenated deltas, provider-specific blobs) alone. + stripped = arguments.lstrip() if isinstance(arguments, str) else "" + if stripped and stripped[0] in "{[": + try: + json.loads(arguments) + except (TypeError, ValueError): + arguments = "{}" tool_calls_param.append( ChatCompletionMessageToolCallParam( id=tc.get("id", "")[:40], diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py index 47d871b0b..ff91cbf66 100644 --- a/src/cai/sdk/agents/models/openai_chatcompletions.py +++ b/src/cai/sdk/agents/models/openai_chatcompletions.py @@ -2676,6 +2676,23 @@ def next_sequence_number() -> int: type="response.content_part.done", ) + # Repair partially-streamed function calls before emission so a + # truncated args string like "{" cannot poison conversation history + # and trigger HTTP 400 on the next request. Only rewrite when the + # buffer is clearly a half-finished JSON object/array; leave other + # accumulator contents (raw concatenated deltas, provider quirks) + # untouched so the rest of the pipeline can decide. + for _fc in state.function_calls.values(): + _args = _fc.arguments or "" + _stripped = _args.lstrip() + if not _stripped: + _fc.arguments = "{}" + elif _stripped[0] in "{[": + try: + json.loads(_args) + except (TypeError, ValueError): + _fc.arguments = "{}" + # Actually send events for the function calls for function_call in state.function_calls.values(): # First, a ResponseOutputItemAdded for the function call diff --git a/src/cai/util/streaming.py b/src/cai/util/streaming.py index ead368db2..628d1743a 100644 --- a/src/cai/util/streaming.py +++ b/src/cai/util/streaming.py @@ -1386,6 +1386,32 @@ def _reset_controlling_tty_sane() -> None: ) except Exception: pass + ensure_cooked_tty() + + +def ensure_cooked_tty() -> None: + """Force canonical line input so ``input()`` / Rich ``console.input`` accept Enter. + + ``stty sane`` alone is not always enough after prompt_toolkit: it may restore a + snapshot taken while ICRNL was cleared, leaving Enter as raw ``\\r`` (shown as ^M). + """ + if not sys.stdin.isatty(): + return + try: + import termios + + fd = sys.stdin.fileno() + attrs = termios.tcgetattr(fd) + iflag, _, _, lflag, _, cc = attrs + iflag |= termios.ICRNL | termios.BRKINT + iflag &= ~(termios.INLCR | termios.IGNCR) + lflag |= termios.ICANON | termios.ECHO | termios.ISIG + cc[termios.VMIN] = 1 + cc[termios.VTIME] = 0 + termios.tcsetattr(fd, termios.TCSADRAIN, (iflag, attrs[1], attrs[2], lflag, attrs[4], cc)) + termios.tcflush(fd, termios.TCIFLUSH) + except Exception: + pass def restore_terminal_state( diff --git a/tests/repl/test_multiline_prompt.py b/tests/repl/test_multiline_prompt.py index e49384989..1fc004697 100644 --- a/tests/repl/test_multiline_prompt.py +++ b/tests/repl/test_multiline_prompt.py @@ -33,6 +33,40 @@ def test_multiline_is_enabled_in_prompt_config(self): 'Without this, Enter may only insert newlines after long agent turns.' ) + def test_tty_restored_after_prompt(self): + """Cooked TTY must be restored after prompt() for follow-up console.input prompts. + + We clear ICRNL before prompt_toolkit starts; on exit it restores that snapshot, + so /compact and other y/N confirmations see Enter as ^M unless we run + restore_terminal_state again in a finally block. + """ + from cai.repl.ui.prompt import get_user_input + import inspect + + source = inspect.getsource(get_user_input) + + assert 'finally:' in source, ( + 'REGRESSION: get_user_input must use finally to restore the TTY after ' + 'prompt_toolkit exits. Without this, Rich console.input shows ^M on Enter.' + ) + assert source.index('finally:') > source.index('prompt('), ( + 'REGRESSION: TTY restore must run after prompt(), not only before it.' + ) + assert 'restore_terminal_state' in source[source.index('finally:'):], ( + 'REGRESSION: finally block must call restore_terminal_state after prompt().' + ) + + def test_ensure_cooked_tty_after_stty_sane(self): + """stty sane must be followed by explicit cooked termios for y/N prompts.""" + from cai.util.streaming import _reset_controlling_tty_sane + import inspect + + source = inspect.getsource(_reset_controlling_tty_sane) + assert 'ensure_cooked_tty' in source, ( + 'REGRESSION: _reset_controlling_tty_sane must call ensure_cooked_tty so ' + 'follow-up console.input accepts Enter instead of echoing ^M.' + ) + def test_icrnl_cleared_before_prompt(self): """ICRNL/INLCR/IGNCR must be cleared before prompt() to keep Enter as submit. diff --git a/tests/repl/test_tty_input.py b/tests/repl/test_tty_input.py new file mode 100644 index 000000000..0d44ec577 --- /dev/null +++ b/tests/repl/test_tty_input.py @@ -0,0 +1,48 @@ +"""Tests for REPL follow-up prompts (y/N after prompt_toolkit).""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from cai.repl.ui.tty_input import ( + normalize_repl_line, + read_repl_line, + read_repl_yes_no, +) + + +class TestNormalizeReplLine: + def test_strips_carriage_return_from_enter(self): + assert normalize_repl_line("y\r") == "y" + + def test_strips_whitespace(self): + assert normalize_repl_line(" yes \r\n") == "yes" + + +class TestReadReplYesNo: + @patch("prompt_toolkit.prompt", return_value="y\r") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_yes_with_carriage_return(self, _restore, _prompt): + console = MagicMock() + assert read_repl_yes_no(console, "Continue") is True + + @patch("prompt_toolkit.prompt", return_value="") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_empty_defaults_to_no(self, _restore, _prompt): + console = MagicMock() + assert read_repl_yes_no(console, "Continue", default=False) is False + + @patch("prompt_toolkit.prompt", return_value="n") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_no_answer(self, _restore, _prompt): + console = MagicMock() + assert read_repl_yes_no(console, "Continue") is False + + +class TestReadReplLine: + @patch("prompt_toolkit.prompt", return_value="RESET\r") + @patch("cai.repl.ui.tty_input._restore_tty_after_prompt") + def test_restores_tty_around_prompt(self, restore, _prompt): + console = MagicMock() + assert read_repl_line(console, "> ", markup=False) == "RESET" + assert restore.call_count == 2 diff --git a/tests/sdk/test_httpx_client_errors.py b/tests/sdk/test_httpx_client_errors.py new file mode 100644 index 000000000..396eadecd --- /dev/null +++ b/tests/sdk/test_httpx_client_errors.py @@ -0,0 +1,15 @@ +"""Regression: LLM error messages must not expose gateway URLs in the REPL.""" + +from __future__ import annotations + + +class TestLlmErrorMessagesHideGatewayUrl: + def test_user_facing_messages_use_branded_server_label(self): + from cai.sdk.agents.models.chatcompletions import httpx_client + import inspect + + source = inspect.getsource(httpx_client) + assert "_LLM_SERVER_LABEL" in source + assert httpx_client._LLM_SERVER_LABEL == "Alias Robotics® LLM servers" + assert "retries from {url}" not in source + assert "All retries exhausted for {url}" not in source