coding-proxy/src/coding/proxy/routing/executor.py at master · ThreeFish-AI/coding-proxy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
"""路由执行器 — 统一的 tier 迭代门控引擎.

封装 ``route_stream`` / ``route_message`` 共享的 tier 循环、
门控判断与错误处理逻辑，消除两个路由方法间的重复代码。
"""

from __future__ import annotations

import logging
import time
from collections.abc import AsyncIterator
from typing import Any

import httpx

from ..vendors.base import (
    NoCompatibleVendorError,
    RequestCapabilities,
    VendorResponse,
)
from ..vendors.token_manager import TokenAcquireError, TokenErrorKind
from .error_classifier import (
    build_request_capabilities,
    extract_error_payload_from_http_status,
    is_semantic_rejection,
    is_structural_validation_error,
)
from .rate_limit import (
    compute_effective_retry_seconds,
    compute_rate_limit_deadline,
    parse_rate_limit_headers,
)
from .session_manager import RouteSessionManager
from .session_policy import SessionPolicyResolver
from .tier import VendorTier
from .usage_parser import (
    build_usage_evidence_records,
    has_missing_input_usage_signals,
    parse_usage_from_chunk,
)
from .usage_recorder import UsageRecorder

# 向后兼容别名
BackendResponse = VendorResponse
NoCompatibleBackendError = NoCompatibleVendorError
from ..compat.canonical import CompatibilityStatus, build_canonical_request

logger = logging.getLogger(__name__)


def _log_http_error_detail(
    tier_name: str,
    exc: Exception,
    *,
    is_stream: bool = False,
    tier: VendorTier | None = None,
) -> None:
    """记录 HTTP 错误的详细信息（状态码 / 响应体摘要 / 异常类型 / 熔断器快照）.

    替代原先单行 ``logger.warning("Tier %s stream failed: %s", ...)``，
    在非 200 响应时输出更丰富的诊断上下文，便于跟踪上游故障根因。
    """
    detail_parts = [f"Tier {tier_name} {'stream' if is_stream else 'message'} failed:"]
    detail_parts.append(f"  exc_type={type(exc).__name__}")
    if isinstance(exc, httpx.HTTPStatusError) and exc.response is not None:
        resp = exc.response
        detail_parts.append(f"  status={resp.status_code}")
        body_preview = (
            (resp.text[:300] if resp.text else "(empty)")
            if resp.content
            else "(no content)"
        )
        detail_parts.append(f"  response_body={body_preview}")
        # 尝试提取 error type / message
        try:
            payload = resp.json() if resp.content else None
        except Exception:
            payload = None
        if isinstance(payload, dict):
            err = payload.get("error", {})
            if isinstance(err, dict):
                detail_parts.append(f"  error_type={err.get('type', 'N/A')}")
                detail_parts.append(f"  error_msg={err.get('message', 'N/A')[:200]}")
    else:
        detail_parts.append(f"  message={str(exc)[:300]}")
    # 熔断器状态快照
    if tier and tier.circuit_breaker:
        cb = tier.circuit_breaker
        cb_info = cb.get_info()
        detail_parts.append(
            f"  circuit_breaker={cb_info['state']} "
            f"(failures={cb_info['failure_count']}/{cb._failure_threshold})"
        )
    logger.warning("\n".join(detail_parts))


def _has_tool_results(body: dict[str, Any]) -> bool:
    """检测请求体是否包含 tool_result 内容块.

    用于诊断日志中标记「当前请求是否处于工具执行循环」，
    帮助快速定位 vendor 对 tool_result 处理不兼容的问题（如 Zhipu 500）.
    """
    for msg in body.get("messages", []):
        content = msg.get("content")
        if not isinstance(content, list):
            continue
        if any(isinstance(b, dict) and b.get("type") == "tool_result" for b in content):
            return True
    return False


def _is_likely_request_format_error(
    status_code: int,
    error_body_text: str | None,
    body: dict[str, Any],
) -> bool:
    """判断 HTTP 错误是否可能由请求格式不兼容导致（而非供应商故障）.

    当请求包含 tool_result 且供应商返回 400 时，极大概率是消息格式转换
    问题（如 tool_result 错位、字段缺失等），此类错误不应计入熔断器，
    因为重试同一格式的请求必然再次失败。

    此函数是 :func:`is_semantic_rejection` 的补充——后者依赖结构化 error body
    （JSON），而部分供应商（如 Copilot）的 400 响应可能是纯文本 ``Bad Request``。
    """
    if status_code != 400:
        return False
    if not _has_tool_results(body):
        return False
    # 400 + 有 tool_result + 无法解析为结构化错误 → 高概率格式问题
    if error_body_text is not None:
        trimmed = error_body_text.strip().lower()
        # 纯文本 400 响应（Copilot 等）或无意义的错误体
        if trimmed in ("bad request", "bad request\n", ""):
            return True
        # 非结构化响应体（非 JSON）
        if not trimmed.startswith("{") and len(trimmed) < 200:
            return True
        # 结构化 JSON 400 但含 tool_call 格式错误码 → 格式不兼容
        # （如 Copilot 返回 {"error":{"code":"invalid_tool_call_format",...}}）
        if "invalid_tool_call_format" in trimmed:
            return True
    return False


def _log_vendor_response_error(
    tier_name: str,
    resp: VendorResponse,
    body: dict[str, Any],
    *,
    is_stream: bool = False,
) -> None:
    """记录供应商返回的非 200 VendorResponse 详细信息.

    补充 :func:`_log_http_error_detail` 的覆盖盲区：
    当 ``send_message()`` 返回 ``VendorResponse(status_code>=400)``
    而非抛出 httpx 异常时，该函数提供等价的诊断日志能力。

    典型场景：Zhipu 等薄透传供应商将上游 500 原样包装为
    VendorResponse 返回，executor 的异常捕获路径不会触发。
    """
    mode = "stream" if is_stream else "message"
    detail_parts = [f"Tier {tier_name} {mode} vendor error response:"]
    detail_parts.append(f"  status={resp.status_code}")
    detail_parts.append(f"  error_type={resp.error_type or 'N/A'}")
    detail_parts.append(f"  error_msg={(resp.error_message or 'N/A')[:300]}")
    # 请求上下文（模型 / 工具 / 工具结果）
    model = body.get("model", "unknown")
    has_tools = bool(body.get("tools"))
    has_tool_results = _has_tool_results(body)
    detail_parts.append(f"  model={model}")
    detail_parts.append(f"  has_tools={has_tools}")
    detail_parts.append(f"  has_tool_results={has_tool_results}")
    # 响应体摘要
    if resp.raw_body:
        try:
            raw_text = resp.raw_body.decode("utf-8", errors="replace")[:500]
        except (AttributeError, UnicodeDecodeError):
            raw_text = f"(binary, {len(resp.raw_body)} bytes)"
        detail_parts.append(f"  response_body_preview={raw_text}")
    logger.warning("\n".join(detail_parts))


# tier.name → 上游 Vendor 协议标签映射（用于 token 用量日志标注）
_VENDOR_PROTOCOL_LABEL_MAP: dict[str, str] = {
    "anthropic": "Anthropic",
    "zhipu": "Anthropic",
    "minimax": "Anthropic",
    "kimi": "Anthropic",
    "doubao": "Anthropic",
    "xiaomi": "Anthropic",
    "alibaba": "Anthropic",
    "copilot": "OpenAI",
    "antigravity": "Gemini",
}


class _RouteExecutor:
    """统一的 tier 迭代门控引擎.

    职责：
    - 按优先级遍历 tiers，执行能力门控与健康检查
    - 委托具体的流式/非流式执行给调用方回调
    - 统一处理 TokenAcquireError / HTTP 错误 / 语义拒绝
    - 成功后委托 UsageRecorder 记录用量
    """

    def __init__(
        self,
        router: Any,  # RequestRouter 引用，用于写入活跃供应商状态
        tiers: list[VendorTier],
        usage_recorder: UsageRecorder,
        session_manager: RouteSessionManager,
        reauth_coordinator: Any | None = None,
        session_policy_resolver: SessionPolicyResolver | None = None,
    ) -> None:
        self._router = router
        self._tiers = tiers
        self._recorder = usage_recorder
        self._session_mgr = session_manager
        self._reauth_coordinator = reauth_coordinator
        self._policy_resolver = session_policy_resolver or SessionPolicyResolver()

        # Tier 名称 → OAuth provider 名称的映射
        self._tier_provider_map: dict[str, str] = {
            "copilot": "github",
            "antigravity": "google",
        }

    # ── 公开执行入口 ──────────────────────────────────────

    def _resolve_effective_tiers(self, session_key: str) -> list[VendorTier]:
        """根据 Session Policy 解析生效的 tier 顺序.

        策略指定的 vendor 按其顺序排列在头部，未提及的保持在末尾。
        无策略时返回全局默认顺序。
        """
        policy = self._policy_resolver.resolve(session_key)
        if not policy or not policy.tiers:
            return self._tiers

        name_to_tier = {t.name: t for t in self._tiers}
        ordered: list[VendorTier] = []
        seen: set[str] = set()
        for name in policy.tiers:
            tier = name_to_tier.get(name)
            if tier and name not in seen:
                ordered.append(tier)
                seen.add(name)
        for tier in self._tiers:
            if tier.name not in seen:
                ordered.append(tier)
                seen.add(tier.name)
        return ordered

    def _prepare_body_for_tier(
        self,
        body: dict[str, Any],
        tier: VendorTier,
        source_vendor: str | None = None,
    ) -> dict[str, Any]:
        """为指定 tier 准备请求体，应用源→目标绑定转换通道.

        通过 VENDOR_TRANSITIONS 注册表查表分发，executor 不感知具体供应商逻辑。
        未注册转换的源→目标对或 source_vendor 为 None 时原样返回请求体。
        """
        from ..convert.vendor_channels import get_transition_channel

        if source_vendor is None:
            return body

        channel_fn = get_transition_channel(source_vendor, tier.name)
        if channel_fn is None:
            return body

        prepared, adaptations = channel_fn(body)
        if adaptations:
            logger.debug(
                "Applied transition channel %s → %s: %s",
                source_vendor,
                tier.name,
                ", ".join(adaptations),
            )
        return prepared

    @staticmethod
    def _determine_source_vendor(
        target_name: str,
        failed_tier_name: str | None,
        session_record: Any,
        body: dict[str, Any] | None = None,
    ) -> str | None:
        """确定跨供应商转换的源 vendor.

        Priority 1: failed_tier_name（请求内故障转移，最可靠）。
        Priority 2: session_record.provider_state 中有已注册转换的 vendor（跨请求）。
        Priority 3: 从 body 内容推断（兜底首次请求无会话状态场景）。
        """
        from ..convert.vendor_channels import (
            get_transition_channel,
            infer_source_vendor_from_body,
        )

        # 请求内：刚失败的 tier 就是源（仅当存在已注册的转换通道时）
        # 修复：原逻辑仅检查 failed_tier != target 就无条件返回，
        # 导致无注册通道的 failed_tier（如 copilot→anthropic）阻断降级到
        # Priority 2/3，原始 body 中的 server_tool_use 等非标准块未被清理。
        if (
            failed_tier_name
            and get_transition_channel(failed_tier_name, target_name) is not None
        ):
            return failed_tier_name

        # 跨请求：从会话历史找有注册转换的源
        if session_record is not None and session_record.provider_state:
            for source in session_record.provider_state:
                if source != target_name and get_transition_channel(
                    source, target_name
                ):
                    return source

        # 首次请求兜底：从 body 内容推断（识别 zhipu 产物等）
        if body is not None:
            inferred = infer_source_vendor_from_body(body)
            if (
                inferred
                and inferred != target_name
                and get_transition_channel(inferred, target_name)
            ):
                return inferred

        return None

    async def execute_stream(
        self,
        body: dict[str, Any],
        headers: dict[str, str],
    ) -> AsyncIterator[tuple[bytes, str]]:
        """路由流式请求，按优先级尝试各层级."""
        last_exc: Exception | None = None
        failed_tier_name: str | None = None
        request_caps = build_request_capabilities(body)
        canonical_request = build_canonical_request(body, headers)
        session_record = await self._session_mgr.get_or_create_record(
            canonical_request.session_key,
            canonical_request.trace_id,
        )
        incompatible_reasons: list[str] = []
        effective_tiers = self._resolve_effective_tiers(canonical_request.session_key)
        last_idx = len(effective_tiers) - 1

        for i, tier in enumerate(effective_tiers):
            is_last = i == last_idx

            gate = await self._try_gate_tier(
                tier,
                is_last,
                request_caps,
                canonical_request,
                session_record,
                incompatible_reasons,
            )
            if gate == "skip":
                continue

            start = time.monotonic()
            usage: dict[str, Any] = {}

            try:
                source_vendor = _RouteExecutor._determine_source_vendor(
                    tier.name, failed_tier_name, session_record, body
                )
                body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
                async for chunk in tier.vendor.send_message_stream(
                    body_for_tier, headers
                ):
                    parse_usage_from_chunk(
                        chunk,
                        usage,
                        vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
                    )
                    yield chunk, tier.name

                info = self._recorder.build_usage_info(usage)
                if has_missing_input_usage_signals(info):
                    logger.warning(
                        "Stream completed with missing input usage signals: output_tokens=%d, "
                        "cache_creation_tokens=%d, cache_read_tokens=%d, tier=%s, usage_data=%r",
                        info.output_tokens,
                        info.cache_creation_tokens,
                        info.cache_read_tokens,
                        tier.name,
                        usage,
                    )
                tier.record_success(
                    info.input_tokens
                    + info.output_tokens
                    + info.cache_creation_tokens
                    + info.cache_read_tokens
                )
                duration = int((time.monotonic() - start) * 1000)
                model = body.get("model", "unknown")
                model_served = usage.get("model_served") or tier.vendor.map_model(model)
                if failed_tier_name is not None:
                    logger.info(
                        "Tier %s stream succeeded (took over from failed tier: %s)",
                        tier.name,
                        failed_tier_name,
                    )
                self._recorder.log_model_call(
                    vendor=tier.name,
                    model_requested=model,
                    model_served=model_served,
                    duration_ms=duration,
                    usage=info,
                )
                await self._session_mgr.persist_session(
                    tier.vendor.get_compat_trace(), session_record
                )
                await self._recorder.record(
                    tier.name,
                    model,
                    model_served,
                    info,
                    duration,
                    True,
                    failed_tier_name is not None,
                    failed_tier_name,
                    evidence_records=build_usage_evidence_records(
                        usage,
                        vendor=tier.name,
                        model_served=model_served,
                        request_id=info.request_id,
                    ),
                    session_key=canonical_request.session_key,
                )
                self._router._active_vendor_name = tier.name  # 更新活跃供应商
                return

            except TokenAcquireError as exc:
                failed_tier_name, last_exc = await self._handle_token_error(
                    tier, exc, is_last, failed_tier_name
                )
                if is_last and last_exc is exc:
                    raise

            except (
                httpx.HTTPStatusError,
                httpx.TimeoutException,
                httpx.ConnectError,
                httpx.ReadError,
            ) as exc:
                _log_http_error_detail(tier.name, exc, is_stream=True, tier=tier)
                (
                    should_continue,
                    failed_tier_name,
                    last_exc,
                ) = await self._handle_http_error(
                    tier,
                    exc,
                    is_last,
                    failed_tier_name,
                    last_exc,
                    is_stream=True,
                    request_body=body,
                )
                if should_continue:
                    self._log_failover_transition(tier, exc, self._tiers, i)
                    continue
                if is_last:
                    raise
                # 结构性验证错误（如 tool_result 角色错位）不应级联到下一层：
                # 同样的畸形请求转发到其他供应商只会重复失败。
                if isinstance(exc, httpx.HTTPStatusError) and exc.response is not None:
                    if is_structural_validation_error(
                        status_code=exc.response.status_code,
                        error_message=self._extract_error_message_from_http_status(exc),
                    ):
                        logger.info(
                            "Tier %s structural validation error, stopping failover",
                            tier.name,
                        )
                        raise
            except Exception as exc:
                logger.error(
                    "Tier %s stream unexpected error: %s: %s",
                    tier.name,
                    type(exc).__name__,
                    exc,
                    exc_info=True,
                )
                tier.record_failure()
                failed_tier_name = tier.name
                if not is_last:
                    continue
                raise

        if last_exc:
            raise last_exc
        raise NoCompatibleVendorError(
            "当前请求包含仅客户端/MCP 可安全承接的能力，未找到兼容供应商",
            reasons=incompatible_reasons,
        )

    async def execute_message(
        self,
        body: dict[str, Any],
        headers: dict[str, str],
    ) -> VendorResponse:
        """路由非流式请求，按优先级尝试各层级."""
        start = time.monotonic()
        failed_tier_name: str | None = None
        request_caps = build_request_capabilities(body)
        canonical_request = build_canonical_request(body, headers)
        session_record = await self._session_mgr.get_or_create_record(
            canonical_request.session_key,
            canonical_request.trace_id,
        )
        incompatible_reasons: list[str] = []
        effective_tiers = self._resolve_effective_tiers(canonical_request.session_key)
        last_idx = len(effective_tiers) - 1

        for i, tier in enumerate(effective_tiers):
            is_last = i == last_idx

            gate = await self._try_gate_tier(
                tier,
                is_last,
                request_caps,
                canonical_request,
                session_record,
                incompatible_reasons,
            )
            if gate == "skip":
                continue

            try:
                source_vendor = _RouteExecutor._determine_source_vendor(
                    tier.name, failed_tier_name, session_record, body
                )
                body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
                resp = await tier.vendor.send_message(body_for_tier, headers)

                if resp.status_code < 400:
                    duration = int((time.monotonic() - start) * 1000)
                    model = body.get("model", "unknown")
                    model_served = resp.model_served or tier.vendor.map_model(model)
                    if failed_tier_name is not None:
                        logger.info(
                            "Tier %s message succeeded (took over from failed tier: %s)",
                            tier.name,
                            failed_tier_name,
                        )
                    self._recorder.log_model_call(
                        vendor=tier.name,
                        model_requested=model,
                        model_served=model_served,
                        duration_ms=duration,
                        usage=resp.usage,
                    )
                    await self._session_mgr.persist_session(
                        tier.vendor.get_compat_trace(), session_record
                    )
                    await self._recorder.record(
                        tier.name,
                        model,
                        model_served,
                        resp.usage,
                        duration,
                        True,
                        failed_tier_name is not None,
                        failed_tier_name,
                        evidence_records=self._recorder.build_nonstream_evidence_records(
                            vendor=tier.name,
                            model_served=model_served,
                            usage=resp.usage,
                        ),
                        session_key=canonical_request.session_key,
                    )
                    self._router._active_vendor_name = tier.name  # 更新活跃供应商
                    return resp

                # 非流式的 semantic rejection 和 failover 判断（从响应对象而非异常中提取）
                is_semantic = is_semantic_rejection(
                    status_code=resp.status_code,
                    error_type=resp.error_type,
                    error_message=resp.error_message,
                )
                # 补充检测：400 + 有 tool_result + 无结构化错误体 → 格式不兼容
                # （覆盖 Copilot 等返回纯文本 "Bad Request" 的场景）
                if not is_semantic and _is_likely_request_format_error(
                    status_code=resp.status_code,
                    error_body_text=(resp.error_message or "")[:500],
                    body=body,
                ):
                    is_semantic = True
                    logger.warning(
                        "Tier %s likely format incompatibility (400 + tool_results), "
                        "trying next tier without recording failure",
                        tier.name,
                    )

                if not is_last and is_semantic:
                    logger.warning(
                        "Tier %s semantic rejection (%s), trying next tier without recording failure",
                        tier.name,
                        resp.error_type or resp.status_code,
                    )
                    failed_tier_name = tier.name
                    continue

                if tier.vendor.should_trigger_failover(
                    resp.status_code,
                    {"error": {"type": resp.error_type, "message": resp.error_message}},
                ):
                    rl_info = parse_rate_limit_headers(
                        resp.response_headers, resp.status_code, resp.error_message
                    )
                    tier.record_failure(
                        is_cap_error=self._is_cap_error(resp) or rl_info.is_cap_error,
                        retry_after_seconds=compute_effective_retry_seconds(rl_info),
                        rate_limit_deadline=compute_rate_limit_deadline(rl_info),
                    )
                    if not is_last:
                        next_tier = (
                            self._tiers[i + 1] if i + 1 < len(self._tiers) else None
                        )
                        next_info = f" → next: {next_tier.name}" if next_tier else ""
                        logger.warning(
                            "Tier %s error %d, failing over%s",
                            tier.name,
                            resp.status_code,
                            next_info,
                        )
                        failed_tier_name = tier.name
                        continue

                # 最后一层或不可 failover 的错误：记录并返回原始响应
                _log_vendor_response_error(tier.name, resp, body, is_stream=False)
                duration = int((time.monotonic() - start) * 1000)
                model = body.get("model", "unknown")
                model_served = resp.model_served or tier.vendor.map_model(model)
                self._recorder.log_model_call(
                    vendor=tier.name,
                    model_requested=model,
                    model_served=model_served,
                    duration_ms=duration,
                    usage=resp.usage,
                )
                await self._recorder.record(
                    tier.name,
                    model,
                    model_served,
                    resp.usage,
                    duration,
                    resp.status_code < 400,
                    failed_tier_name is not None,
                    failed_tier_name,
                    evidence_records=self._recorder.build_nonstream_evidence_records(
                        vendor=tier.name, model_served=model_served, usage=resp.usage
                    ),
                    session_key=canonical_request.session_key,
                )
                return resp

            except TokenAcquireError as exc:
                failed_tier_name, last_exc = await self._handle_token_error(
                    tier, exc, is_last, failed_tier_name
                )
                if is_last:
                    raise
                continue

            except (httpx.TimeoutException, httpx.ConnectError, httpx.ReadError) as exc:
                _log_http_error_detail(tier.name, exc, is_stream=False, tier=tier)
                tier.record_failure()
                failed_tier_name = tier.name
                if is_last:
                    raise
                continue
            except Exception as exc:
                logger.error(
                    "Tier %s message unexpected error: %s: %s",
                    tier.name,
                    type(exc).__name__,
                    exc,
                    exc_info=True,
                )
                tier.record_failure()
                failed_tier_name = tier.name
                if not is_last:
                    continue
                raise

        if incompatible_reasons:
            raise NoCompatibleVendorError(
                "当前请求包含仅客户端/MCP 可安全承接的能力，未找到兼容供应商",
                reasons=incompatible_reasons,
            )
        raise RuntimeError("无可用供应商层级")

    # ── 门控与错误处理 ──────────────────────────────────────

    async def _try_gate_tier(
        self,
        tier: VendorTier,
        is_last: bool,
        request_caps: RequestCapabilities,
        canonical_request: Any,
        session_record: Any,
        incompatible_reasons: list[str],
    ) -> str:
        """对单个 tier 执行能力门控和兼容性检查.

        Returns:
            "eligible" — 通过所有门控，可执行请求
            "skip" — 未通过门控，跳过此 tier
        """
        supported, reasons = tier.vendor.supports_request(request_caps)
        if not supported:
            reason_text = ",".join(sorted({r.value for r in reasons}))
            incompatible_reasons.append(f"{tier.name}:{reason_text}")
            logger.info(
                "Tier %s skipped due to incompatible capabilities: %s",
                tier.name,
                reason_text,
            )
            return "skip"

        decision = tier.vendor.make_compatibility_decision(canonical_request)
        if decision.status is CompatibilityStatus.UNSAFE:
            reason_text = ",".join(sorted(decision.unsupported_semantics))
            incompatible_reasons.append(f"{tier.name}:{reason_text}")
            logger.info(
                "Tier %s skipped due to compatibility decision: %s",
                tier.name,
                reason_text,
            )
            return "skip"

        self._session_mgr.apply_compat_context(
            tier=tier,
            canonical_request=canonical_request,
            decision=decision,
            session_record=session_record,
        )

        # 非终端层使用健康检查门控；终端层仅检查 can_execute
        if not is_last:
            if not await tier.can_execute_with_health_check():
                return "skip"
        elif not tier.can_execute():
            return "skip"

        return "eligible"

    async def _handle_token_error(
        self,
        tier: VendorTier,
        exc: TokenAcquireError,
        is_last: bool,
        failed_tier_name: str | None,
    ) -> tuple[str | None, Exception]:
        """处理 TokenAcquireError 的共享逻辑.

        特殊处理：
        - ``INSUFFICIENT_SCOPE`` / ``INVALID_CREDENTIALS`` 属于永久性凭证问题，
          重试无意义，因此**不记录熔断器失败**，避免级联 OPEN 阻塞恢复。
        - 其他临时性错误（网络超时等）正常计入熔断器。
        """
        logger.warning("Tier %s credential error: %s", tier.name, exc)
        is_permanent = exc.kind in (
            TokenErrorKind.INSUFFICIENT_SCOPE,
            TokenErrorKind.INVALID_CREDENTIALS,
        )
        if not is_permanent:
            tier.record_failure()
        else:
            logger.info(
                "Tier %s permanent credential issue (%s), "
                "skipping circuit breaker failure recording",
                tier.name,
                exc.kind.value,
            )
        if exc.needs_reauth and self._reauth_coordinator:
            provider = self._tier_provider_map.get(tier.name)
            if provider:
                await self._reauth_coordinator.request_reauth(provider)
        return tier.name, exc

    async def _handle_http_error(
        self,
        tier: VendorTier,
        exc: Exception,
        is_last: bool,
        failed_tier_name: str | None,
        last_exc: Exception | None,
        *,
        is_stream: bool = False,
        request_body: dict[str, Any] | None = None,
    ) -> tuple[bool, str | None, Exception | None]:
        """处理 HTTP 错误的共享逻辑（流式路径）.

        Returns:
            (should_continue, failed_tier_name, last_exc)
        """
        semantic_rejection = False
        if isinstance(exc, httpx.HTTPStatusError) and exc.response is not None:
            payload = extract_error_payload_from_http_status(exc)
            error = payload.get("error", {}) if isinstance(payload, dict) else {}
            semantic_rejection = is_semantic_rejection(
                status_code=exc.response.status_code,
                error_type=error.get("type") if isinstance(error, dict) else None,
                error_message=error.get("message") if isinstance(error, dict) else None,
            )

            # 补充检测：400 + 有 tool_result + 非结构化错误体 → 格式不兼容
            # （如 Copilot 返回纯文本 "Bad Request\n"）
            # 此类错误不应计入熔断器，因为重试同一请求必然再次失败。
            if (
                not semantic_rejection
                and request_body is not None
                and _is_likely_request_format_error(
                    status_code=exc.response.status_code,
                    error_body_text=exc.response.text[:500]
                    if exc.response.text
                    else None,
                    body=request_body,
                )
            ):
                semantic_rejection = True
                logger.warning(
                    "Tier %s likely format incompatibility (400 + tool_results), "
                    "trying next tier without recording failure",
                    tier.name,
                )

            if semantic_rejection and not is_last:
                return True, tier.name, exc

            rl_info = parse_rate_limit_headers(
                exc.response.headers,
                exc.response.status_code,
                exc.response.text[:500] if exc.response.text else None,
            )
            tier.record_failure(
                is_cap_error=rl_info.is_cap_error,
                retry_after_seconds=compute_effective_retry_seconds(rl_info),
                rate_limit_deadline=compute_rate_limit_deadline(rl_info),
            )
        else:
            tier.record_failure()

        return False, tier.name, exc

    @staticmethod
    def _log_failover_transition(
        current_tier: VendorTier,
        exc: Exception,
        tiers: list[VendorTier],
        current_index: int,
    ) -> None:
        """记录 vendor 轮转摘要日志（谁 → 谁，原因）."""
        next_tier = tiers[current_index + 1] if current_index + 1 < len(tiers) else None
        if next_tier is None:
            return

        # 提取错误摘要
        reason = type(exc).__name__
        if isinstance(exc, httpx.HTTPStatusError) and exc.response is not None:
            reason = f"HTTP {exc.response.status_code}"

        logger.info(
            "Failover: %s → %s (reason: %s)",
            current_tier.name,
            next_tier.name,
            reason,
        )

    @staticmethod
    def _is_cap_error(resp: VendorResponse) -> bool:
        """判断是否为订阅用量上限错误."""
        if resp.status_code not in (429, 403):
            return False
        msg = (resp.error_message or "").lower()
        return any(p in msg for p in ("usage cap", "quota", "limit exceeded"))

    @staticmethod
    def _extract_error_message_from_http_status(
        exc: httpx.HTTPStatusError,
    ) -> str | None:
        """从 HTTPStatusError 中提取错误消息文本."""
        if exc.response is None or not exc.response.content:
            return None
        try:
            payload = exc.response.json()
        except Exception:
            return None
        if not isinstance(payload, dict):
            return None
        error = payload.get("error", {})
        if isinstance(error, dict):
            return error.get("message")
        return None