33# ===============================================================================
44# 1. 配置区域 - 用户只需修改这里
55# ===============================================================================
6- SUFFIX=" deep_finance " # 实验后缀,影响所有日志和实验名称
7- PREFIX=" open " # 实验前缀,影响日志和实验所在文件夹
6+ SUFFIX=" newjudge " # 实验后缀,影响所有日志和实验名称
7+ PREFIX=" ajet_newjudge " # 实验前缀,影响日志和实验所在文件夹
88
99# OpenJudge 模型配置
1010OPENJUDGE_LLM=' qwen-flash' # OpenJudge 评分模型
1111RM_LLM=' qwen-max' # RM Gallery 评分模型
1212JUDGE_CONCURRENCY=10
1313
1414# 奖励权重配置
15- RM_WEIGHT=0.4
16- CITATION_AUDIT_WEIGHT=0.2
17- REPORT_RESOLUTION_WEIGHT=0.2
18- TRAJECTORY_FAITHFULNESS_WEIGHT=0.2
15+ RM_WEIGHT=0.5
16+ PRESENTATION_QUALITY_WEIGHT=0.25
17+ GROUNDING_WEIGHT=0.25
1918
2019# 训练参数配置
2120NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
@@ -24,7 +23,7 @@ NUM_STEPS=6 # 每个样本step轮数
2423DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000
2524
2625# 主目录
27- export AJET_ROOT=" /mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet "
26+ export AJET_ROOT=" /mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new "
2827
2928NNODES=${WORLD_SIZE}
3029
@@ -56,12 +55,11 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
5655 -e " s|{{MODEL_PATH}}|${MODEL_PATH} |g" \
5756 -e " s|{{NNODES}}|${NNODES} |g" \
5857 -e " s|{{RM_WEIGHT}}|${RM_WEIGHT} |g" \
59- -e " s|{{CITATION_AUDIT_WEIGHT}}|${CITATION_AUDIT_WEIGHT} |g" \
58+ -e " s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT} |g" \
59+ -e " s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT} |g" \
6060 -e " s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM} |g" \
6161 -e " s|{{RM_LLM}}|${RM_LLM} |g" \
6262 -e " s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY} |g" \
63- -e " s|{{REPORT_RESOLUTION_WEIGHT}}|${REPORT_RESOLUTION_WEIGHT} |g" \
64- -e " s|{{TRAJECTORY_FAITHFULNESS_WEIGHT}}|${TRAJECTORY_FAITHFULNESS_WEIGHT} |g" \
6563 -e " s|{{NUM_REPEAT}}|${NUM_REPEAT} |g" \
6664 -e " s|{{NUM_STEPS}}|${NUM_STEPS} |g" \
6765 -e " s|{{TRAIN_BATCH_SIZE}}|${TRAIN_BATCH_SIZE} |g" \
@@ -73,7 +71,7 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
7371 ${AJET_ROOT} /${CONFIG_TEMPLATE} > ${CONFIG_FILE}
7472
7573echo " 配置文件已生成: ${CONFIG_FILE} "
76- echo " 参数确认: RM=${RM_WEIGHT} , Citation =${CITATION_AUDIT_WEIGHT } , OpenJudge=${OPENJUDGE_LLM} , RM_LLM=${RM_LLM} "
74+ echo " 参数确认: RM=${RM_WEIGHT} , PresentationQuality =${PRESENTATION_QUALITY_WEIGHT} , Grounding= ${GROUNDING_WEIGHT } , OpenJudge=${OPENJUDGE_LLM} , RM_LLM=${RM_LLM} "
7775
7876# ===============================================================================
7977# 3. 环境配置
@@ -115,7 +113,7 @@ LOG_DIR="${AJET_ROOT}/logs/${PREFIX}"
115113MASTER_IP_FILE=" ${LOG_DIR} /master-ip_${SUFFIX} .log"
116114ENV_SERVICE_LOG=" ${LOG_DIR} /env_service_${SUFFIX} _${CURRENT_TIME} .log"
117115TRAIN_LOG=" ${LOG_DIR} /train_${SUFFIX} _${CURRENT_TIME} .log"
118-
116+ env_log_prefix= " ${SUFFIX} __ ${CURRENT_TIME} "
119117# 多机训练参数配置
120118GPUS_PER_NODE=8
121119EXPECTED_WORKERS=$WORLD_SIZE
@@ -208,7 +206,7 @@ if [[ $HOSTNAME == *"-master-"* ]]; then
208206 --with-deepfinance \
209207 --conf ${CONFIG_FILE} \
210208 --backbone=" verl" \
211- --prefix=${SUFFIX } \
209+ --prefix=${env_log_prefix } \
212210 2>&1 | tee ${TRAIN_LOG}
213211
214212
0 commit comments