-
Notifications
You must be signed in to change notification settings - Fork 57
Expand file tree
/
Copy pathtest_false_assumptions.py
More file actions
69 lines (61 loc) · 2.25 KB
/
test_false_assumptions.py
File metadata and controls
69 lines (61 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from typing import cast
import litellm
import pytest
from openai.types.chat import ChatCompletionMessageParam
import scenario
scenario.configure(default_model="openai/gpt-4.1-nano")
class Agent(scenario.AgentAdapter):
async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
response = litellm.completion(
model="openai/gpt-4.1-nano",
messages=[
{
"role": "system",
"content": "You are a helpful assistant",
},
*input.messages,
],
)
message = response.choices[0].message # type: ignore
return [cast(ChatCompletionMessageParam, message)]
@pytest.mark.agent_test
@pytest.mark.flaky(reruns=2)
@pytest.mark.asyncio
async def test_early_assumption_bias():
result = await scenario.run(
name="early assumption bias",
description="""
The agent makes false assumption that the user is talking about an ATM bank, and user corrects it that they actually mean river banks
""",
agents=[
Agent(),
scenario.UserSimulatorAgent(),
scenario.JudgeAgent(
criteria=[
"user should get good recommendations on river crossing",
"agent should NOT keep following up about ATM recommendation after user has corrected them that they are actually just hiking",
],
),
],
max_turns=10,
script=[
# Define hardcoded messages
scenario.agent("Hello, how can I help you today?"),
scenario.user("how do I safely approach a bank?"),
# Or let it be generated automatically
scenario.agent(),
# Generate a user follow-up message
scenario.user(),
# Let the simulation proceed for 2 more turns, print at every turn
scenario.proceed(
turns=2,
on_turn=lambda state: print(
f"Turn {state.current_turn}: {state.messages}"
),
),
# Time to make a judgment call
scenario.judge(),
],
set_id="python-examples",
)
assert result.success