web2code-sandbox/agent.py at main · sk413025/web2code-sandbox · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
"""Minimal Ollama-based research agent using web_search, web_fetch, and Docker sandbox execution.

The agent follows the loop described in intro.md:
Plan -> Search -> Fetch -> Write Python -> Execute -> Synthesize.
"""

import argparse
import json
import os
import shutil
import subprocess
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional

import ollama

try:
    from ollama import chat, web_fetch, web_search  # type: ignore
    HAS_WEB_TOOLS = True
except ImportError:  # pragma: no cover - defensive fallback for older ollama versions
    chat = ollama.chat
    web_search = getattr(ollama, "web_search", None)
    web_fetch = getattr(ollama, "web_fetch", None)
    HAS_WEB_TOOLS = callable(web_search) and callable(web_fetch)

    if not HAS_WEB_TOOLS:
        def _missing_web_search(*_: object, **__: object) -> Dict[str, Any]:
            raise RuntimeError(
                "ollama.web_search is unavailable. Upgrade ollama>=0.6.0 or provide"
                " a custom search tool."
            )

        def _missing_web_fetch(*_: object, **__: object) -> Dict[str, Any]:
            raise RuntimeError(
                "ollama.web_fetch is unavailable. Upgrade ollama>=0.6.0 or provide"
                " a custom fetch tool."
            )

        web_search = _missing_web_search  # type: ignore[assignment]
        web_fetch = _missing_web_fetch  # type: ignore[assignment]


MAX_TOOL_MESSAGE_BYTES = 8_000
MAX_EXEC_OUTPUT_BYTES = 64 * 1024
DEFAULT_MODEL = os.getenv("OLLAMA_AGENT_MODEL", "gpt-oss:20b")
DEFAULT_TIMEOUT_SECS = int(os.getenv("OLLAMA_AGENT_SANDBOX_TIMEOUT", "60"))
DEFAULT_MAX_TURNS = int(os.getenv("OLLAMA_AGENT_MAX_TURNS", "12"))
SANDBOX_IMAGE = os.getenv("OLLAMA_AGENT_SANDBOX_IMAGE", "py-sandbox:latest")


def _save_execution_artifacts(workdir: Path, code: str) -> Dict[str, str]:
    """Save generated code and output files to a timestamped directory."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    artifacts_dir = Path("artifacts") / f"execution_{timestamp}"
    artifacts_dir.mkdir(parents=True, exist_ok=True)

    saved_files = {}

    # Save the generated code
    code_file = artifacts_dir / "generated_code.py"
    code_file.write_text(code, encoding="utf-8")
    saved_files["code"] = str(code_file.absolute())

    # Look for generated files in the workspace
    for item in workdir.iterdir():
        if item.is_file() and item.name not in {"main.py"}:
            # Copy generated files (images, data files, etc.)
            dest_file = artifacts_dir / item.name
            shutil.copy2(item, dest_file)
            saved_files[item.name] = str(dest_file.absolute())

    # Look for files in output subdirectory
    output_dir = workdir / "output"
    if output_dir.exists():
        for item in output_dir.iterdir():
            if item.is_file():
                dest_file = artifacts_dir / f"output_{item.name}"
                shutil.copy2(item, dest_file)
                saved_files[f"output_{item.name}"] = str(dest_file.absolute())

    return saved_files


def run_python(
    *,
    code: str,
    files: Optional[Mapping[str, str]] = None,
    requirements: Optional[Iterable[str]] = None,
    timeout_sec: int = DEFAULT_TIMEOUT_SECS,
    save_artifacts: bool = True,
) -> Dict[str, Any]:
    """Execute model-generated Python code inside a Docker sandbox."""
    del requirements  # Packages should be pre-installed in the sandbox image for security.
    with tempfile.TemporaryDirectory(prefix="ollama_agent_") as tempdir:
        workdir = Path(tempdir)
        main_py = workdir / "main.py"
        main_py.write_text(code, encoding="utf-8")

        for name, content in (files or {}).items():
            safe_name = Path(name)
            if safe_name.name != name or safe_name.name in {"main.py"}:
                raise ValueError(f"Unsupported sandbox filename: {name}")
            (workdir / safe_name.name).write_text(content, encoding="utf-8")

        # Create output directory for artifacts
        output_dir = workdir / "output"
        output_dir.mkdir(exist_ok=True)

        cmd = [
            "docker",
            "run",
            "--rm",
            "--network",
            "none",
            "--cpus",
            "1.0",
            "--memory",
            "1g",
            "--pids-limit",
            "256",
            "-v",
            f"{workdir}:/workspace",  # Remove :ro to allow writing
            SANDBOX_IMAGE,
        ]
        try:
            proc = subprocess.run(
                cmd,
                capture_output=True,
                timeout=timeout_sec,
                check=False,
            )
        except FileNotFoundError:
            return {
                "stdout": "",
                "stderr": "docker executable not found. Build the sandbox image or adjust SANDBOX_IMAGE.",
                "exit_code": 127,
                "truncated": False,
            }
        except subprocess.TimeoutExpired:
            return {"stdout": "", "stderr": "TIMEOUT", "exit_code": 124, "truncated": False}

        stdout = proc.stdout[:MAX_EXEC_OUTPUT_BYTES].decode("utf-8", errors="ignore")
        stderr = proc.stderr[:MAX_EXEC_OUTPUT_BYTES].decode("utf-8", errors="ignore")

        result = {
            "stdout": stdout,
            "stderr": stderr,
            "exit_code": proc.returncode,
            "truncated": len(proc.stdout) > MAX_EXEC_OUTPUT_BYTES
            or len(proc.stderr) > MAX_EXEC_OUTPUT_BYTES,
        }

        # Save artifacts if requested and execution was successful
        if save_artifacts and proc.returncode == 0:
            artifacts_saved = _save_execution_artifacts(workdir, code)
            if artifacts_saved:
                result["artifacts_saved"] = artifacts_saved

        return result


def _normalize_message(obj: Any) -> Dict[str, Any]:
    if hasattr(obj, "model_dump"):  # Pydantic style
        obj = obj.model_dump()
    if hasattr(obj, "dict") and not isinstance(obj, dict):
        obj = obj.dict()
    if hasattr(obj, "message") and not isinstance(obj, dict):
        obj = obj.message
    if isinstance(obj, dict) and "message" in obj and not {"role", "content"} <= obj.keys():
        obj = obj["message"]
    if not isinstance(obj, dict):
        raise TypeError(f"Unexpected message payload: {type(obj)!r}")
    return obj


def _extract_tool_calls(message: Mapping[str, Any]) -> List[Dict[str, Any]]:
    tool_calls = message.get("tool_calls") or message.get("toolCalls")
    if tool_calls is None:
        return []
    if isinstance(tool_calls, list):
        return [
            tc if isinstance(tc, dict) else tc.dict()  # type: ignore[attr-defined]
            for tc in tool_calls
        ]
    raise TypeError("tool_calls payload must be a list")


SYSTEM_PROMPT = """You are an autonomous research and analysis agent.
Follow the loop: Plan -> Search -> Fetch -> Write Python -> Execute -> Synthesize.
Use the available tools when they add value.

Constraints:
- Keep responses concise and focus on evidence-backed conclusions.
- Only read from /workspace files when running Python.
- Prefer JSON outputs for final reports when possible.
- Stop once you have sufficient evidence or after two loops with no new findings.
"""


AVAILABLE_TOOLS = {"run_python": run_python}
if HAS_WEB_TOOLS:
    AVAILABLE_TOOLS.update({
        "web_search": web_search,
        "web_fetch": web_fetch,
    })


def agent_query(
    user_query: str,
    *,
    model: str = DEFAULT_MODEL,
    think: bool = True,
    max_turns: int = DEFAULT_MAX_TURNS,
) -> None:
    messages: List[Dict[str, Any]] = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_query},
    ]

    for turn in range(max_turns):
        kwargs: Dict[str, Any] = {
            "model": model,
            "messages": messages,
        }
        tool_order = ["web_search", "web_fetch", "run_python"]
        tool_functions = [AVAILABLE_TOOLS[name] for name in tool_order if name in AVAILABLE_TOOLS]
        if tool_functions:
            kwargs["tools"] = tool_functions
        if think:
            kwargs["think"] = True
        try:
            response = chat(**kwargs)
        except AttributeError:
            kwargs.pop("tools", None)
            response = chat(**kwargs)
        message = _normalize_message(getattr(response, "message", response))
        messages.append(message)

        tool_calls = _extract_tool_calls(message)
        if tool_calls:
            for tool_call in tool_calls:
                fn_payload = tool_call.get("function") or {}
                fn_name = fn_payload.get("name")
                raw_args = fn_payload.get("arguments", {})
                if isinstance(raw_args, str):
                    raw_args = json.loads(raw_args or "{}")
                tool_fn = AVAILABLE_TOOLS.get(fn_name)
                if not tool_fn:
                    tool_result = {"error": f"Unknown tool: {fn_name}"}
                else:
                    try:
                        tool_result = tool_fn(**raw_args)
                    except Exception as exc:  # noqa: BLE001 - surfaced to the model
                        tool_result = {"error": str(exc)}
                tool_message = {
                    "role": "tool",
                    "tool_name": fn_name,
                    "content": json.dumps(tool_result, ensure_ascii=False)[:MAX_TOOL_MESSAGE_BYTES],
                }
                messages.append(tool_message)
            continue

        content = message.get("content")
        if content:
            print(content)
        else:
            print("(No content returned by the model.)")
        return

    print("Reached maximum number of turns without a final response.")


def main() -> None:
    parser = argparse.ArgumentParser(description="Run the Ollama research agent loop.")
    parser.add_argument(
        "prompt",
        nargs="?",
        help="User instruction for the agent. Defaults to the validation task from intro.md.",
    )
    parser.add_argument("--model", default=DEFAULT_MODEL, help="Ollama model name.")
    parser.add_argument(
        "--max-turns", type=int, default=DEFAULT_MAX_TURNS, help="Safety cap for agent loops."
    )
    parser.add_argument(
        "--no-think", action="store_true", help="Disable Ollama think traces if unsupported."
    )
    args = parser.parse_args()

    prompt = args.prompt or (
        "找 3 篇 gpt-oss 官方或媒體介紹文，摘要 license 與 tool use 能力，然後寫 Python 程式分析"
        " context.json 中的正文是否包含 'Apache 2.0', 'tool use', 'web_search', 'python'，"
        " 最後輸出 JSON 報表。"
    )

    if not os.getenv("OLLAMA_API_KEY"):
        print("Warning: OLLAMA_API_KEY is not set. Set it before making web_search calls.")
    if not HAS_WEB_TOOLS:
        print(
            "Warning: Installed ollama package lacks web_search/web_fetch. Upgrade to ollama>=0.6.0"
            " or provide custom tool implementations. Agent will proceed without web tools."
        )

    agent_query(
        prompt,
        model=args.model,
        think=not args.no_think,
        max_turns=args.max_turns,
    )


if __name__ == "__main__":
    main()