Skip to content

Commit 44b69a2

Browse files
committed
refactor: update logging level for ImportError and modify configuration settings in deep finance scripts
1 parent a0a8b0e commit 44b69a2

File tree

10 files changed

+233
-26
lines changed

10 files changed

+233
-26
lines changed

ajet/backbone/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
"AjetTaskReader",
1414
]
1515
except ImportError:
16-
logger.warning("trinity is not available.")
16+
logger.info("trinity is not available.")
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
# AgentJet Timeline
3+
4+
在多智能体的复杂 LLM Agents 交互过程中,我们称一个 Agent 在任务过程中,反复调用 LLM 产生的 Token 轨迹为一条 Timeline
5+
6+
Timeline 包含以下要素:
7+
8+
- Text 文本 message 列表
9+
- 提示:在多数qwen模型中,message以 <|im_start|> 开始,以 <|im_end|> 结束,具体取决于模型的 tokenizer 和 chat_template
10+
- Token 序列 message 列表
11+
- 提示:在多数qwen模型中,message以 <|im_start|> 对应的Token ID开始,以 <|im_end|> 所对应的 Token 结束,具体取决于模型的 tokenizer
12+
- Loss Mask Message 列表
13+
- 提示:loss_mask 的每一位都和 Token 一一对应
14+
- loss_mask=1 代表该Token参与 loss计算,也通常同时代表了该Token是LLM生成的Token
15+
- loss_mask=0 代表不参与loss计算,在大多数情况下,代表该Token源于用户输入,tokenizer 和 chat_template 的补充,环境反馈等。
16+
17+
18+
Timeline
19+
20+
21+
<!--
22+
23+
uv pip install -e /mnt/data_cpfs/taoshuchang.tsc/deepresearch/RM-Gallery -i https://mirrors.aliyun.com/pypi/simple/
24+
uv pip install -e /mnt/data_cpfs/taoshuchang.tsc/deepresearch/OpenJudge -i https://mirrors.aliyun.com/pypi/simple/
25+
uv pip install openai==1.109.1 -i https://mirrors.aliyun.com/pypi/simple/ -->

ajet/default_config/ajet_default.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ ajet:
281281

282282

283283
# the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
284-
enable_experimental_interchange_server: True
284+
enable_experimental_interchange_server: False
285285
interchange_server:
286286
interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
287287
interchange_server_port: 'auto'

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies = [
2323
"tenacity",
2424
"loguru",
2525
"debugpy",
26+
"gymnasium[toy_text]",
2627
"swanlab",
2728
"modelscope>=1.18.1",
2829
"pydantic",

tests/bench/README.md

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,23 @@ Note: `tests/bench` source code is for test robot only, therefore `yaml` configu
1111
# prepare dataset path
1212
# prepare swanlab api
1313

14-
source .venv/bin/activate
14+
source .verl/bin/activate
1515

1616
python -m pytest -s tests/bench/benchmark_math/execute_benchmark_math.py
1717
python -m pytest -s tests/bench/benchmark_appworld/execute_benchmark_appworld.py
1818
python -m pytest -s tests/bench/benchmark_countdown/execute_benchmark_countdown.py
1919
python -m pytest -s tests/bench/benchmark_learn2ask/execute_benchmark_learn2ask.py
2020
python -m pytest -s tests/bench/benchmark_frozenlake/execute_benchmark_frozenlake.py
2121

22-
VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_math/execute_benchmark_math.py::TestBenchmarkMath::test_01_begin_verl
23-
VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_appworld/execute_benchmark_appworld.py::TestBenchmarkAppworld::test_01_begin_verl
24-
VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_countdown/execute_benchmark_countdown.py::TestBenchmarkCountdown::test_01_begin_verl
25-
VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_learn2ask/execute_benchmark_learn2ask.py::TestBenchmarkLearnToAsk::test_01_begin_verl
26-
VERL_PYTHON="./.venv/bin/python" python -m pytest -s tests/bench/benchmark_frozenlake/execute_benchmark_frozenlake.py::TestBenchmarkFrozenLake::test_01_begin_verl
22+
VERL_PYTHON="./.verl/bin/python" python -m pytest -s tests/bench/benchmark_math/execute_benchmark_math.py::TestBenchmarkMath::test_01_begin_verl
23+
VERL_PYTHON="./.verl/bin/python" python -m pytest -s tests/bench/benchmark_appworld/execute_benchmark_appworld.py::TestBenchmarkAppworld::test_01_begin_verl
24+
VERL_PYTHON="./.verl/bin/python" python -m pytest -s tests/bench/benchmark_countdown/execute_benchmark_countdown.py::TestBenchmarkCountdown::test_01_begin_verl
25+
VERL_PYTHON="./.verl/bin/python" python -m pytest -s tests/bench/benchmark_learn2ask/execute_benchmark_learn2ask.py::TestBenchmarkLearnToAsk::test_01_begin_verl
26+
VERL_PYTHON="./.verl/bin/python" python -m pytest -s tests/bench/benchmark_frozenlake/execute_benchmark_frozenlake.py::TestBenchmarkFrozenLake::test_01_begin_verl
27+
28+
29+
export APPWORLD_PATH="/dev/shm/pack_all_in_one"
30+
export APPWORLD_SCRIPT="bash EnvService/env_sandbox/appworld.sh"
31+
python -m ajet.launcher --conf tests/bench/benchmark_appworld/benchmark_appworld.yaml --with-appworld --backbone=debug --autokill
32+
python -m ajet.launcher --conf tests/bench/benchmark_appworld/benchmark_appworld.yaml --with-appworld --autokill --db="EXT"
2733
```

tests/bench/benchmark_learn2ask/benchmark_learn2ask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ def __init__(self):
2525
self.reward_expectation = {
2626
# step : expected local average reward range
2727
# step : [low, high ]
28-
50 : [2.5, 99999.0],
29-
100 : [2.7, 99999.0],
28+
50 : [2.3, 99999.0],
29+
100 : [2.5, 99999.0],
3030
200 : [2.9, 99999.0],
3131
}
3232
# fmt: on

tutorial/example_deep_finance/deep_finance.sh

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
set -e
2+
set -e
33
#===============================================================================
44
# 1. 配置区域 - 用户只需修改这里
55
#===============================================================================
@@ -24,7 +24,6 @@ NUM_STEPS=6 # 每个样本step轮数
2424
DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000
2525

2626
# 主目录
27-
export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet"
2827

2928
NNODES=${WORLD_SIZE}
3029

@@ -107,7 +106,7 @@ export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS
107106
# 其他服务配置
108107
HF_ENDPOINT="https://hf-mirror.com"
109108
ES_HOSTS="http://11.160.132.46:8200"
110-
export HF_ENDPOINT ES_HOSTS
109+
export HF_ENDPOINT ES_HOSTS
111110

112111
# log 文件位置
113112
CURRENT_TIME=$(date "+%Y%m%d_%H%M%S")
@@ -157,8 +156,6 @@ export NCCL_ASYNC_ERROR_HANDLING=1
157156

158157
export PYTHONPATH="${AJET_ROOT}:${PYTHONPATH}"
159158
export RAY_CLUSTER_MODE="multi_node"
160-
export DEEPFINANCE_PATH="${ENV_SERVICE_ROOT}" # AgentJet 内部可能使用此路径
161-
export DEEPFINANCE_SCRIPT="source /mnt/data/taoshuchang.tsc/anaconda3/etc/profile.d/conda.sh && conda activate finworld_1209 && cd ${ENV_SERVICE_ROOT} && DEEPFINANCE_TOOL_RESULT_MAX_CHARS=${DEEPFINANCE_TOOL_RESULT_MAX_CHARS} DEEPFINANCE_MCP_CONFIG=${DEEPFINANCE_MCP_CONFIG} CACHE_TYPE=${CACHE_TYPE} MONGO_URI=${MONGO_URI} MONGO_DB_NAME=${MONGO_DB_NAME} MONGO_COLLECTION_NAME=${MONGO_COLLECTION_NAME} python -m env_service.env_service --env finworld --portal 0.0.0.0 --port 8080"
162159

163160

164161
#===============================================================================
@@ -205,12 +202,11 @@ if [[ $HOSTNAME == *"-master-"* ]]; then
205202

206203
# 启动训练任务(最核心)
207204
python ajet/launcher.py \
208-
--with-deepfinance \
209205
--conf ${CONFIG_FILE} \
210206
--backbone="verl" \
211207
--prefix=${SUFFIX} \
212208
2>&1 | tee ${TRAIN_LOG}
213-
209+
214210

215211
#===============================================================================
216212
# 6.2 Worker 节点启动流程
@@ -222,4 +218,4 @@ else
222218
ray stop || true
223219
ray start --address $MASTER_ADDR:6379 --num-gpus 8
224220
while true; do sleep 60; done
225-
fi
221+
fi

tutorial/example_deep_finance/deep_finance.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ ajet:
4747
interchange_server:
4848
interchange_method: 'tcp' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
4949
debug:
50-
debug_max_parallel: 64 # 增加并行任务数,充分利用GPU
50+
debug_max_parallel: 1 # 增加并行任务数,充分利用GPU
5151
debug_first_n_tasks: 100 # 增加处理的任务数
5252
data:
5353
train_batch_size: 32
5454
max_prompt_length: 8000
5555
max_response_length: 41000
56-
56+
5757
task_reader:
5858
type: deep_finance # 数据从 JSON 加载并组装 init_messages,工具调用走 env_service
5959
deep_finance:
@@ -64,11 +64,13 @@ ajet:
6464
# env_service 仍需配置(用于工具调用)
6565
env_service:
6666
env_type: "finworld"
67-
env_url: "http://127.0.0.1:8080"
67+
env_url: {{ENV_SERVICE_URL}}
6868
env_action_preference: code
69+
70+
6971
trainer:
7072
default_local_dir: {{CKPT_SAVE_PATH}}
71-
# resume_mode: disable # 禁用自动恢复,从头开始训练
73+
# resume_mode: disable # 禁用自动恢复,从头开始训练
7274
actor_rollout_ref:
7375
rollout:
7476
tensor_model_parallel_size: 8
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/bin/bash
2+
set -e
3+
#===============================================================================
4+
# 1. 配置区域 - 用户只需修改这里
5+
#===============================================================================
6+
SUFFIX="ajet_deep_finance" # 实验后缀,影响所有日志和实验名称
7+
PREFIX="open" # 实验前缀,影响日志和实验所在文件夹
8+
9+
# OpenJudge 模型配置
10+
OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型
11+
RM_LLM='qwen-max' # RM Gallery 评分模型
12+
JUDGE_CONCURRENCY=10
13+
14+
# 奖励权重配置
15+
RM_WEIGHT=0.4
16+
CITATION_AUDIT_WEIGHT=0.2
17+
REPORT_RESOLUTION_WEIGHT=0.2
18+
TRAJECTORY_FAITHFULNESS_WEIGHT=0.2
19+
20+
# 训练参数配置
21+
NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
22+
TRAIN_BATCH_SIZE=32 # 训练batchsize
23+
NUM_STEPS=6 # 每个样本step轮数
24+
DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000
25+
26+
# 主目录
27+
28+
NNODES=${WORLD_SIZE}
29+
30+
# 涉密的配置(API_KEY以及模型、数据位置)从.env读取
31+
cd ${AJET_ROOT}
32+
source .venv/bin/activate
33+
34+
# API密钥配置 - 从 .env 文件加载
35+
ENV_FILE="${AJET_ROOT}/.env"
36+
if [ -f "$ENV_FILE" ]; then
37+
set -a
38+
source "$ENV_FILE"
39+
set +a
40+
echo -e "\033[32m已从 $ENV_FILE 加载环境变量\033[0m"
41+
else
42+
echo -e "\033[31m警告: 找不到 .env 文件: $ENV_FILE\033[0m"
43+
fi
44+
45+
#===============================================================================
46+
# 2. 动态生成配置文件 (从yaml template生成yaml)
47+
#===============================================================================
48+
# 修改:配置文件生成路径,现在动态生成到 yaml 目录下
49+
CONFIG_TEMPLATE="tutorial/example_deep_finance/yaml_template/deep_finance_template.yaml"
50+
CONFIG_FILE="${AJET_ROOT}/tutorial/example_deep_finance/yaml/${SUFFIX}.yaml"
51+
mkdir -p $(dirname ${CONFIG_FILE})
52+
53+
sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
54+
-e "s|{{PREFIX}}|${PREFIX}|g" \
55+
-e "s|{{MODEL_PATH}}|${MODEL_PATH}|g" \
56+
-e "s|{{NNODES}}|${NNODES}|g" \
57+
-e "s|{{RM_WEIGHT}}|${RM_WEIGHT}|g" \
58+
-e "s|{{CITATION_AUDIT_WEIGHT}}|${CITATION_AUDIT_WEIGHT}|g" \
59+
-e "s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM}|g" \
60+
-e "s|{{RM_LLM}}|${RM_LLM}|g" \
61+
-e "s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY}|g" \
62+
-e "s|{{REPORT_RESOLUTION_WEIGHT}}|${REPORT_RESOLUTION_WEIGHT}|g" \
63+
-e "s|{{TRAJECTORY_FAITHFULNESS_WEIGHT}}|${TRAJECTORY_FAITHFULNESS_WEIGHT}|g" \
64+
-e "s|{{NUM_REPEAT}}|${NUM_REPEAT}|g" \
65+
-e "s|{{NUM_STEPS}}|${NUM_STEPS}|g" \
66+
-e "s|{{TRAIN_BATCH_SIZE}}|${TRAIN_BATCH_SIZE}|g" \
67+
-e "s|{{TRAIN_DATA_PATH}}|${TRAIN_DATA_PATH}|g" \
68+
-e "s|{{VAL_DATA_PATH}}|${VAL_DATA_PATH}|g" \
69+
-e "s|{{ENV_SERVICE_URL}}|${ENV_SERVICE_URL}|g" \
70+
-e "s|{{TRAIN_REF_ANS_PATH}}|${TRAIN_REF_ANS_PATH}|g" \
71+
-e "s|{{VAL_REF_ANS_PATH}}|${VAL_REF_ANS_PATH}|g" \
72+
-e "s|{{CKPT_SAVE_PATH}}|${CKPT_SAVE_PATH}|g" \
73+
${AJET_ROOT}/${CONFIG_TEMPLATE} > ${CONFIG_FILE}
74+
75+
echo "配置文件已生成: ${CONFIG_FILE}"
76+
echo "参数确认: RM=${RM_WEIGHT}, Citation=${CITATION_AUDIT_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}"
77+
78+
#===============================================================================
79+
# 3. 环境配置
80+
#===============================================================================
81+
# MongoDB 缓存配置
82+
CACHE_TYPE="mongodb"
83+
MONGO_URI="mongodb://${ADDR}:27117/"
84+
MONGO_DB_NAME="finworld_cache"
85+
MONGO_COLLECTION_NAME="tool_cache"
86+
export CACHE_TYPE MONGO_URI MONGO_DB_NAME MONGO_COLLECTION_NAME
87+
88+
# DeepFinance MCP 配置
89+
DEEPFINANCE_MCP_CONFIG="${AJET_ROOT}/tutorial/example_deep_finance/config/mcp_finance_tool_generated.json"
90+
91+
# 动态生成 MCP 配置文件
92+
mkdir -p $(dirname ${DEEPFINANCE_MCP_CONFIG})
93+
cat > ${DEEPFINANCE_MCP_CONFIG} << EOF
94+
{
95+
"mcpServers": {
96+
"flowllm": {
97+
"transport": "sse",
98+
"url": "http://${ADDR}:${MCP_PORT}/sse",
99+
"timeout": 600,
100+
"sse_read_timeout": 1200
101+
}
102+
}
103+
}
104+
EOF
105+
export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS
106+
107+
# 其他服务配置
108+
HF_ENDPOINT="https://hf-mirror.com"
109+
ES_HOSTS="http://11.160.132.46:8200"
110+
export HF_ENDPOINT ES_HOSTS
111+
112+
# log 文件位置
113+
CURRENT_TIME=$(date "+%Y%m%d_%H%M%S")
114+
LOG_DIR="${AJET_ROOT}/logs/${PREFIX}"
115+
MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log"
116+
ENV_SERVICE_LOG="${LOG_DIR}/env_service_${SUFFIX}_${CURRENT_TIME}.log"
117+
TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log"
118+
119+
# 多机训练参数配置
120+
GPUS_PER_NODE=8
121+
EXPECTED_WORKERS=$WORLD_SIZE
122+
123+
124+
#===============================================================================
125+
# 4. 工具函数 以及 NCCL 配置(固定)
126+
#===============================================================================
127+
print_green() {
128+
echo -e "\033[32m$1\033[0m"
129+
}
130+
131+
log() {
132+
echo -e "\033[0;32m[$(date '+%Y-%m-%d %H:%M:%S')]\033[0m \033[0;34m[INFO]\033[0m $1"
133+
}
134+
135+
check_workers() {
136+
local status_output=$(ray status 2>/dev/null)
137+
if [ -z "$status_output" ]; then echo 0; return; fi
138+
local node_count=$(echo "$status_output" | grep -E "^[[:space:]]*1[[:space:]]+node_" | wc -l)
139+
if [ "$node_count" -gt 0 ]; then echo $node_count; return; fi
140+
echo $(echo "$status_output" | grep -o "node_[0-9a-f]\+" | sort -u | wc -l)
141+
}
142+
143+
check_gpu_resources() {
144+
gpu_count=$(ray status 2>/dev/null | grep -A 10 "Resources" | grep "GPU" | awk '{print $1}' | cut -d'/' -f2)
145+
if [ -z "$gpu_count" ]; then echo 0; else printf "%.0f" "$gpu_count"; fi
146+
}
147+
148+
149+
export NCCL_TIMEOUT=1800
150+
export NCCL_DEBUG=WARN
151+
export NCCL_IB_TIMEOUT=23
152+
export NCCL_ASYNC_ERROR_HANDLING=1
153+
154+
#===============================================================================
155+
# 5. 工具envservice 环境变量
156+
#===============================================================================
157+
158+
export PYTHONPATH="${AJET_ROOT}:${PYTHONPATH}"
159+
export RAY_CLUSTER_MODE="multi_node"
160+
161+
162+
#===============================================================================
163+
# 6. 主流程
164+
#===============================================================================
165+
log "开始多机多卡训练: ${SUFFIX}"
166+
log "节点数: ${NNODES}, 每节点GPU数: ${GPUS_PER_NODE}"
167+
mkdir -p ${LOG_DIR}
168+
mkdir -p $(dirname ${CONFIG_FILE})
169+
170+
#===============================================================================
171+
# 6.1 Master 节点启动流程
172+
#===============================================================================
173+
# 启动训练任务(最核心)
174+
python ajet/launcher.py \
175+
--conf ${CONFIG_FILE} \
176+
--backbone="debug" \
177+
2>&1 | tee ${TRAIN_LOG}

tutorial/example_deep_finance/yaml_template/deep_finance_template.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ ajet:
4747
interchange_server:
4848
interchange_method: 'tcp' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
4949
debug:
50-
debug_max_parallel: 64 # 增加并行任务数,充分利用GPU
50+
debug_max_parallel: 1 # 增加并行任务数,充分利用GPU
5151
debug_first_n_tasks: 100 # 增加处理的任务数
5252
data:
5353
train_batch_size: {{TRAIN_BATCH_SIZE}}
5454
max_prompt_length: 8000
5555
max_response_length: 41000
56-
56+
5757
task_reader:
5858
type: deep_finance # 数据从 JSON 加载并组装 init_messages,工具调用走 env_service
5959
deep_finance:
@@ -64,11 +64,11 @@ ajet:
6464
# env_service 仍需配置(用于工具调用)
6565
env_service:
6666
env_type: "finworld"
67-
env_url: "http://127.0.0.1:8080"
67+
env_url: {{ENV_SERVICE_URL}}
6868
env_action_preference: code
6969
trainer:
7070
default_local_dir: "{{CKPT_SAVE_PATH}}/{{PREFIX}}/{{SUFFIX}}"
71-
# resume_mode: disable # 禁用自动恢复,从头开始训练
71+
# resume_mode: disable # 禁用自动恢复,从头开始训练
7272
actor_rollout_ref:
7373
rollout:
7474
tensor_model_parallel_size: 8

0 commit comments

Comments
 (0)