Skip to content

Commit 4538f5a

Browse files
committed
chore(config): update experiment suffix, prefix and reward weights in deep_finance.sh
1 parent d9cbdc0 commit 4538f5a

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

tutorial/example_deep_finance/deep_finance.sh

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,18 @@ set -e
33
#===============================================================================
44
# 1. 配置区域 - 用户只需修改这里
55
#===============================================================================
6-
SUFFIX="deep_finance" # 实验后缀,影响所有日志和实验名称
7-
PREFIX="open" # 实验前缀,影响日志和实验所在文件夹
6+
SUFFIX="newjudge" # 实验后缀,影响所有日志和实验名称
7+
PREFIX="ajet_newjudge" # 实验前缀,影响日志和实验所在文件夹
88

99
# OpenJudge 模型配置
1010
OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型
1111
RM_LLM='qwen-max' # RM Gallery 评分模型
1212
JUDGE_CONCURRENCY=10
1313

1414
# 奖励权重配置
15-
RM_WEIGHT=0.4
16-
CITATION_AUDIT_WEIGHT=0.2
17-
REPORT_RESOLUTION_WEIGHT=0.2
18-
TRAJECTORY_FAITHFULNESS_WEIGHT=0.2
15+
RM_WEIGHT=0.5
16+
PRESENTATION_QUALITY_WEIGHT=0.25
17+
GROUNDING_WEIGHT=0.25
1918

2019
# 训练参数配置
2120
NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
@@ -24,7 +23,7 @@ NUM_STEPS=6 # 每个样本step轮数
2423
DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000
2524

2625
# 主目录
27-
export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet"
26+
export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new"
2827

2928
NNODES=${WORLD_SIZE}
3029

@@ -56,12 +55,11 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
5655
-e "s|{{MODEL_PATH}}|${MODEL_PATH}|g" \
5756
-e "s|{{NNODES}}|${NNODES}|g" \
5857
-e "s|{{RM_WEIGHT}}|${RM_WEIGHT}|g" \
59-
-e "s|{{CITATION_AUDIT_WEIGHT}}|${CITATION_AUDIT_WEIGHT}|g" \
58+
-e "s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT}|g" \
59+
-e "s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT}|g" \
6060
-e "s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM}|g" \
6161
-e "s|{{RM_LLM}}|${RM_LLM}|g" \
6262
-e "s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY}|g" \
63-
-e "s|{{REPORT_RESOLUTION_WEIGHT}}|${REPORT_RESOLUTION_WEIGHT}|g" \
64-
-e "s|{{TRAJECTORY_FAITHFULNESS_WEIGHT}}|${TRAJECTORY_FAITHFULNESS_WEIGHT}|g" \
6563
-e "s|{{NUM_REPEAT}}|${NUM_REPEAT}|g" \
6664
-e "s|{{NUM_STEPS}}|${NUM_STEPS}|g" \
6765
-e "s|{{TRAIN_BATCH_SIZE}}|${TRAIN_BATCH_SIZE}|g" \
@@ -73,7 +71,7 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
7371
${AJET_ROOT}/${CONFIG_TEMPLATE} > ${CONFIG_FILE}
7472

7573
echo "配置文件已生成: ${CONFIG_FILE}"
76-
echo "参数确认: RM=${RM_WEIGHT}, Citation=${CITATION_AUDIT_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}"
74+
echo "参数确认: RM=${RM_WEIGHT}, PresentationQuality=${PRESENTATION_QUALITY_WEIGHT}, Grounding=${GROUNDING_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}"
7775

7876
#===============================================================================
7977
# 3. 环境配置
@@ -115,7 +113,7 @@ LOG_DIR="${AJET_ROOT}/logs/${PREFIX}"
115113
MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log"
116114
ENV_SERVICE_LOG="${LOG_DIR}/env_service_${SUFFIX}_${CURRENT_TIME}.log"
117115
TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log"
118-
116+
env_log_prefix="${SUFFIX}__${CURRENT_TIME}"
119117
# 多机训练参数配置
120118
GPUS_PER_NODE=8
121119
EXPECTED_WORKERS=$WORLD_SIZE
@@ -208,7 +206,7 @@ if [[ $HOSTNAME == *"-master-"* ]]; then
208206
--with-deepfinance \
209207
--conf ${CONFIG_FILE} \
210208
--backbone="verl" \
211-
--prefix=${SUFFIX} \
209+
--prefix=${env_log_prefix} \
212210
2>&1 | tee ${TRAIN_LOG}
213211

214212

0 commit comments

Comments
 (0)