PaddlePaddle · nemonameless · Nov 17, 2024
diff --git a/...wen2_vl/N1C8/qwen2_vl_sft_bs32_bf16_DP.sh → ...n2_vl/N1C8/qwen2_vl_sft_2b_bs1_bf16_DP.sh b/...wen2_vl/N1C8/qwen2_vl_sft_bs32_bf16_DP.sh → ...n2_vl/N1C8/qwen2_vl_sft_2b_bs1_bf16_DP.sh
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-model_item=qwen2_vl_sft
+model_item=qwen2_vl_sft_2b
 model=qwen2_vl
 bs_item=1
 fp_item=bf16
 run_mode=DP
 device_num=N1C8
-max_epochs=3
-num_workers=0
+max_epochs=1
+num_workers=8
 
 # get data
 bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh

diff --git a/tests/test_tipc/dygraph/dp/qwen2_vl/N1C8/qwen2_vl_sft_7b_bs1_bf16_DP.sh b/tests/test_tipc/dygraph/dp/qwen2_vl/N1C8/qwen2_vl_sft_7b_bs1_bf16_DP.sh
@@ -0,0 +1,27 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model_item=qwen2_vl_sft_7b
+model=qwen2_vl
+bs_item=1
+fp_item=bf16
+run_mode=DP
+device_num=N1C8
+max_epochs=1
+num_workers=8
+
+# get data
+bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
+# run
+bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
diff --git a/tests/test_tipc/dygraph/dp/qwen2_vl/benchmark_common/run_benchmark.sh b/tests/test_tipc/dygraph/dp/qwen2_vl/benchmark_common/run_benchmark.sh
@@ -17,20 +17,20 @@
 # Test training benchmark for a model.
 # Usage：bash benchmark/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num}
 function _set_params(){
-    model_item=${1:-"qwen2_vl"}   # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48
+    model_item=${1:-"qwen2_vl_sft_2b"}   # (必选) 模型 item |fastscnn|segformer_b0| ocrnet_hrnetw48
     base_batch_size=${2:-"1"}       # (必选) 如果是静态图单进程，则表示每张卡上的BS，需在训练时*卡数
     fp_item=${3:-"bf16"}            # (必选) fp32|fp16|bf16
     run_mode=${4:-"DP"}             # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1
-    device_num=${5:-"N1C1"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
+    device_num=${5:-"N1C8"}         # (必选) 使用的卡数量，N1C1|N1C8|N4C32 （4机32卡）
     profiling=${PROFILING:-"false"}      # (必选) Profiling  开关，默认关闭，通过全局变量传递
 
     model_repo="PaddleMIX"          # (必选) 模型套件的名字
     speed_unit="sample/sec"         # (必选)速度指标单位
     skip_steps=2                  # (必选)解析日志，跳过模型前几个性能不稳定的step
     keyword="ips:"                 # (必选)解析日志，筛选出性能数据所在行的关键字
     convergence_key="loss:"        # (可选)解析日志，筛选出收敛数据所在行的关键字 如：convergence_key="loss:"
-    max_epochs=${6:-"3"}                 # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件  或是max_epoch
-    num_workers=${7:-"2"}                # (可选)
+    max_epochs=${6:-"1"}                 # （可选）需保证模型执行时间在5分钟内，需要修改代码提前中断的直接提PR 合入套件  或是max_epoch
+    num_workers=${7:-"8"}                # (可选)
     is_large_model=False           # (可选)普通模型默认为False，如果添加大模型且只取一条ips设置为True
 
     # 以下为通用执行命令，无特殊可不用修改
@@ -58,16 +58,25 @@ function _train(){
             log_file=${train_log_file}
     fi
     rm -rf ./outputs
+
+    if [ ${model_item} = "qwen2_vl_sft_2b" ];then # 目前只支持2B和7B
+        use_model_args="--model_name_or_path Qwen/Qwen2-VL-2B-Instruct"
+        use_output_args="--output_dir work_dirs/qwen2_vl_sft_2b_bs32_1e8"
+    else
+        use_model_args="--model_name_or_path Qwen/Qwen2-VL-7B-Instruct"
+        use_output_args="--output_dir work_dirs/qwen2_vl_sft_7b_bs32_1e8"
+    fi
+
     # add some flags
     export FLAGS_eager_delete_tensor_gb=0.0
     export FLAGS_fraction_of_gpu_memory_to_use=0.98
     export FLAGS_conv_workspace_size_limit=4096
 
     train_cmd="../paddlemix/examples/qwen2_vl/qwen2vl_finetune.py \
             --do_train \
-            --model_name_or_path "Qwen/Qwen2-VL-2B-Instruct" \
+            ${use_model_args} \
             --meta_path ../paddlemix/examples/qwen2_vl/configs/benchmark_chartqa_500.json \
-            --output_dir ./work_dirs/qwen2_vl_sft_benchmark \
+            ${use_output_args} \
             --logging_steps=1 \
             --num_train_epochs=${max_epochs} \
             --save_strategy no \
@@ -91,7 +100,7 @@ function _train(){
             --amp_master_grad=1 \
             --hybrid_parallel_topo_order="sharding_first" \
             --per_device_train_batch_size ${base_batch_size} \
-            --gradient_accumulation_steps 4 \
+            --gradient_accumulation_steps 1 \
             --benchmark True \
             "