From bbaadf0c7c83f38b4282ad23e8ebc316eaffe6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Fri, 15 Nov 2024 17:11:01 +0800 Subject: [PATCH 1/7] =?UTF-8?q?InternVL2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../finetune_intervl2_8B.json | 33 +++++++ .../finetune_internvl2_8B/data_8B.json | 54 +++++++++++ .../finetune_internvl2_8B/model_8B.json | 90 +++++++++++++++++ .../st/shell_scripts/finetune_internvl2_8B.sh | 97 +++++++++++++++++++ 4 files changed, 274 insertions(+) create mode 100644 tests/st/baseline_results/finetune_intervl2_8B.json create mode 100644 tests/st/run_configs/finetune_internvl2_8B/data_8B.json create mode 100644 tests/st/run_configs/finetune_internvl2_8B/model_8B.json create mode 100644 tests/st/shell_scripts/finetune_internvl2_8B.sh diff --git a/tests/st/baseline_results/finetune_intervl2_8B.json b/tests/st/baseline_results/finetune_intervl2_8B.json new file mode 100644 index 00000000..4313829c --- /dev/null +++ b/tests/st/baseline_results/finetune_intervl2_8B.json @@ -0,0 +1,33 @@ +{ + "loss": [ + 2.864627E-01, + 4.773775E-01 + ], + "time": [ + 20954.1, + 11843.9 + ], + "memo info": [ + { + "rank": 0, + "allocated memory": 20283.76953125, + "max allocated memory": 24900.033203125 + }, + { + "rank": 2, + "allocated memory": 22471.15185546875, + "max allocated memory": 27331.3740234375 + }, + { + "rank": 4, + "allocated memory": 22470.65185546875, + "max allocated memory": 23292.36181640625 + }, + { + "rank": 6, + "allocated memory": 24313.50390625, + "max allocated memory": 25759.67529296875 + } + ], + "warm_up": 1 +} \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json new file mode 100644 index 00000000..f541d446 --- /dev/null +++ b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json @@ -0,0 +1,54 @@ +{ + "dataset_param": { + "dataset_type": "image", + "basic_parameters": { + "data_path": "/home/ci_resource/data/internvlv1-2-sft/opensource/ai2d_train_12k.jsonl", + "data_folder": "/home/ci_resource/data/internvlv1-2-sft/data/ai2d" + }, + "preprocess_parameters": { + "image_reader_type": "torchvision", + "image_processer_type": "image2pixel", + "train_pipeline": { + "image":[ + {"trans_type": "Pad2Square", "param": {"mean": [0.485, 0.456, 0.406]}}, + {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC"}}, + {"trans_type": "ToTensor"}, + {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}} + ] + } + }, + "tokenizer_config": { + "hub_backend": "hf", + "autotokenizer_name": "AutoTokenizer", + "from_pretrained": "/home/ci_resource/models/InternVL2-8B/pretrained/raw_ckpt/InternVL2-8B", + "model_max_length": 4096, + "add_eos_token": false, + "trust_remote_code": true, + "use_fast": false + }, + "use_text_processer": true, + "template_name": "internlm2-chat", + "repeat_time": 1, + "patch_size": 14, + "image_size": 448, + "down_sample_ratio": 0.5, + "group_by_length": true, + "dynamic_image_size": true, + "use_thumbnail": true, + "min_dynamic_patch": 1, + "max_dynamic_patch": 6 + }, + "dataloader_param": { + "dataloader_mode": "sampler", + "batch_size": 1, + "num_workers": 4, + "shuffle": false, + "drop_last": true, + "pin_memory": true, + "sampler_type": "BaseRandomBatchSampler", + "collate_param": { + "model_name": "internvl", + "pad_id": 2 + } + } +} \ No newline at end of file diff --git a/tests/st/run_configs/finetune_internvl2_8B/model_8B.json b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json new file mode 100644 index 00000000..adb4be58 --- /dev/null +++ b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json @@ -0,0 +1,90 @@ +{ + "model_id": "InternVL", + "pre_process": true, + "post_process": true, + "add_text_encoder": false, + "img_embedding_idx": 1, + "downsample_ratio": 0.5, + "select_layer": -1, + "ps_version": "v2", + "add_rmsnorm_offset": false, + "img_context_token_id": 92546, + "text_decoder": { + "num_layers": 32, + "pipeline_layer_index": [0, 6, 15, 24], + "hidden_size": 4096, + "num_attention_heads": 32, + "num_query_groups": 8, + "ffn_hidden_size": 14336, + "kv_channels": 128, + "hidden_dropout": 0.0, + "attention_dropout": 0.0, + "layernorm_epsilon": 1e-05, + "normalization": "RMSNorm", + "qk_layernorm": false, + "add_bias_linear": false, + "add_qkv_bias": false, + "bias_activation_fusion": false, + "gated_linear_unit": true, + "init_method_std": 0.01, + "apply_query_key_layer_scaling":false, + "attention_softmax_in_fp32": true, + "masked_softmax_fusion": false, + "layernorm_zero_centered_gamma": false, + "bias_dropout_fusion":false, + "apply_rope_fusion": true, + "memory_efficient_layer_norm": false, + "max_position_embeddings": 4096, + "fp16": false, + "bf16": true, + "params_dtype": "bf16", + "fp16_lm_cross_entropy": false, + "rotary_percent": 1.0, + "position_embedding_type": "rope", + "parallel_output": true, + "initializer_factor": 0.1, + "persist_layer_norm": true, + "activation_func": "silu", + "vocab_size": 92553, + "rotary_base": 1000000 + }, + "image_encoder": { + "vision_encoder": { + "model_id": "InternViT", + "num_layers": 24, + "hidden_size": 1024, + "ffn_hidden_size": 4096, + "num_attention_heads": 16, + "num_channels": 3, + "patch_size": 14, + "image_size": 448, + "add_qkv_bias": true, + "qk_layernorm": false, + "activation_func": "gelu", + "normalization": "layernorm", + "layernorm_epsilon": 1e-6, + "hidden_dropout": 0.0, + "drop_path_rate": 0.0, + "attention_dropout": 0.0, + "init_method_std": 0.02, + "initializer_factor": 1.0, + "output_hidden_states": false, + "use_return_dict": false, + "recompute_granularity": "full", + "recompute_method": "uniform", + "recompute_num_layers": 1, + "params_dtype": "bf16", + "post_layer_norm": false, + "downsample_ratio": 0.5, + "fp16": false, + "bf16": true, + "attention_softmax_in_fp32": false, + "select_layer": -1, + "ps_version": "v2", + "is_freeze": true + }, + "vision_projector": null + }, + "text_encoder": null, + "video_encoder": null +} \ No newline at end of file diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh new file mode 100644 index 00000000..5fc954a9 --- /dev/null +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -0,0 +1,97 @@ +#!/bin/bash +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export ASCEND_GLOBAL_LOG_LEVEL=3 +export TASK_QUEUE_ENABLE=1 +export COMBINED_ENABLE=1 +export CPU_AFFINITY_CONF=1 +export HCCL_CONNECT_TIMEOUT=1200 +export CUDA_DEVICE_MAX_CONNECTIONS=1 + +GPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=29199 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) + +MBS=1 +GRAD_ACC_STEP=64 +TP=1 +PP=4 +CP=1 +DP=$(($WORLD_SIZE/$TP/$PP/$CP)) +GBS=$(($MBS*$GRAD_ACC_STEP*$DP)) + +BASEPATH=$(cd `dirname $0`; cd ../../../; pwd) + +MM_DATA="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/data_8B.json" +MM_MODEL="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/model_8B.json" +MM_TOOL="$BASEPATH/mindspeed_mm/tools/tools.json" +LOAD_PATH="/home/ci_resource/models/InternVL2-8B/ckpt_pp4" + +MM_ARGS=" + --mm-data ${MM_DATA} \ + --mm-model ${MM_MODEL} \ + --mm-tool ${MM_TOOL} +" + +DISTRIBUTED_ARGS=" + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --node_rank $NODE_RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --context-parallel-size ${CP} \ + --micro-batch-size ${MBS} \ + --global-batch-size ${GBS} \ + --num-layers 24 \ + --hidden-size 4096 \ + --num-attention-heads 16 \ + --seq-length 4096 \ + --max-position-embeddings 4096 \ + --attention-dropout 0.0 \ + --hidden-dropout 0.0 \ + --tokenizer-type NullTokenizer \ + --vocab-size 92553 \ + --position-embedding-type rope \ + --rotary-base 100000 \ + --swiglu \ + --no-masked-softmax-fusion \ + --lr 4e-5 \ + --min-lr 0.0 \ + --train-iters 2 \ + --lr-decay-style cosine \ + --weight-decay 0.05 \ + --clip-grad 1.0 \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --no-gradient-accumulation-fusion \ + --no-load-optim \ + --no-load-rng \ + --no-save-optim \ + --no-save-rng \ + --use-distributed-optimizer \ + --bf16 \ + --load $LOAD_PATH \ +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 5000 \ + --eval-interval 5000 \ + --eval-iters 5000 \ +" + +logfile=$(date +%Y%m%d)_$(date +%H%M%S) +mkdir -p logs +torchrun $DISTRIBUTED_ARGS \ + $BASEPATH/pretrain_internvl.py \ + $GPT_ARGS \ + $MM_ARGS \ + $OUTPUT_ARGS \ + --distributed-backend nccl \ No newline at end of file -- Gitee From a8a4741ee2bb93752cc7607fedcb31c760ece8ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Fri, 15 Nov 2024 18:06:51 +0800 Subject: [PATCH 2/7] =?UTF-8?q?InternVL2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../st/shell_scripts/finetune_internvl2_8B.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 5fc954a9..f67455ca 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -1,15 +1,17 @@ #!/bin/bash export ASCEND_SLOG_PRINT_TO_STDOUT=0 export ASCEND_GLOBAL_LOG_LEVEL=3 -export TASK_QUEUE_ENABLE=1 +export TASK_QUEUE_ENABLE=2 export COMBINED_ENABLE=1 export CPU_AFFINITY_CONF=1 export HCCL_CONNECT_TIMEOUT=1200 export CUDA_DEVICE_MAX_CONNECTIONS=1 +export HOST_CACHE_CAPACITY=20 +export ACLNN_CACHE_LIMIT=100000 GPUS_PER_NODE=8 MASTER_ADDR=localhost -MASTER_PORT=29199 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) @@ -49,7 +51,7 @@ GPT_ARGS=" --context-parallel-size ${CP} \ --micro-batch-size ${MBS} \ --global-batch-size ${GBS} \ - --num-layers 24 \ + --num-layers 32 \ --hidden-size 4096 \ --num-attention-heads 16 \ --seq-length 4096 \ @@ -59,12 +61,12 @@ GPT_ARGS=" --tokenizer-type NullTokenizer \ --vocab-size 92553 \ --position-embedding-type rope \ - --rotary-base 100000 \ + --rotary-base 1000000 \ --swiglu \ --no-masked-softmax-fusion \ --lr 4e-5 \ --min-lr 0.0 \ - --train-iters 2 \ + --train-iters 5000 \ --lr-decay-style cosine \ --weight-decay 0.05 \ --clip-grad 1.0 \ @@ -78,6 +80,11 @@ GPT_ARGS=" --use-distributed-optimizer \ --bf16 \ --load $LOAD_PATH \ + --use-flash-attn \ + --use-fused-rotary-pos-emb \ + --variable-seq-lengths \ + --normalization RMSNorm \ + --use-fused-rmsnorm \ " OUTPUT_ARGS=" @@ -90,7 +97,7 @@ OUTPUT_ARGS=" logfile=$(date +%Y%m%d)_$(date +%H%M%S) mkdir -p logs torchrun $DISTRIBUTED_ARGS \ - $BASEPATH/pretrain_internvl.py \ + pretrain_internvl.py \ $GPT_ARGS \ $MM_ARGS \ $OUTPUT_ARGS \ -- Gitee From 9b4a9545a04d6e3feef805a6547e7e7e883ddfd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Fri, 15 Nov 2024 18:21:00 +0800 Subject: [PATCH 3/7] =?UTF-8?q?InternVL2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/st/shell_scripts/finetune_internvl2_8B.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index f67455ca..11a6bdea 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -97,7 +97,7 @@ OUTPUT_ARGS=" logfile=$(date +%Y%m%d)_$(date +%H%M%S) mkdir -p logs torchrun $DISTRIBUTED_ARGS \ - pretrain_internvl.py \ + $BASEPATH/pretrain_internvl.py \ $GPT_ARGS \ $MM_ARGS \ $OUTPUT_ARGS \ -- Gitee From b045590867d185b9e9d20981df8f56f5c83b54db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Mon, 18 Nov 2024 14:26:43 +0800 Subject: [PATCH 4/7] =?UTF-8?q?internvl2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/st/shell_scripts/finetune_internvl2_8B.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 11a6bdea..2bc83533 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -66,7 +66,7 @@ GPT_ARGS=" --no-masked-softmax-fusion \ --lr 4e-5 \ --min-lr 0.0 \ - --train-iters 5000 \ + --train-iters 2 \ --lr-decay-style cosine \ --weight-decay 0.05 \ --clip-grad 1.0 \ @@ -94,8 +94,6 @@ OUTPUT_ARGS=" --eval-iters 5000 \ " -logfile=$(date +%Y%m%d)_$(date +%H%M%S) -mkdir -p logs torchrun $DISTRIBUTED_ARGS \ $BASEPATH/pretrain_internvl.py \ $GPT_ARGS \ -- Gitee From 8f6fcadbda9efceb7220f8c4ea606bc4d80b2699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Mon, 18 Nov 2024 15:45:20 +0800 Subject: [PATCH 5/7] =?UTF-8?q?internvl2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/st/shell_scripts/finetune_internvl2_8B.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 2bc83533..cee4d5fb 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -29,7 +29,7 @@ BASEPATH=$(cd `dirname $0`; cd ../../../; pwd) MM_DATA="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/data_8B.json" MM_MODEL="$BASEPATH/tests/st/run_configs/finetune_internvl2_8B/model_8B.json" MM_TOOL="$BASEPATH/mindspeed_mm/tools/tools.json" -LOAD_PATH="/home/ci_resource/models/InternVL2-8B/ckpt_pp4" +LOAD_PATH="/home/ci_resource/models/InternVL2-8B/pretrained/ckpt_pp4" MM_ARGS=" --mm-data ${MM_DATA} \ -- Gitee From 0197b52f56184b446394b548ff376bb0d3d7a5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Mon, 18 Nov 2024 17:20:54 +0800 Subject: [PATCH 6/7] =?UTF-8?q?internvl2-8B=20ST=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{finetune_intervl2_8B.json => finetune_internvl2_8B.json} | 0 tests/st/run_configs/finetune_internvl2_8B/data_8B.json | 2 +- tests/st/run_configs/finetune_internvl2_8B/model_8B.json | 2 +- tests/st/shell_scripts/finetune_internvl2_8B.sh | 3 ++- 4 files changed, 4 insertions(+), 3 deletions(-) rename tests/st/baseline_results/{finetune_intervl2_8B.json => finetune_internvl2_8B.json} (100%) diff --git a/tests/st/baseline_results/finetune_intervl2_8B.json b/tests/st/baseline_results/finetune_internvl2_8B.json similarity index 100% rename from tests/st/baseline_results/finetune_intervl2_8B.json rename to tests/st/baseline_results/finetune_internvl2_8B.json diff --git a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json index f541d446..7562cdb3 100644 --- a/tests/st/run_configs/finetune_internvl2_8B/data_8B.json +++ b/tests/st/run_configs/finetune_internvl2_8B/data_8B.json @@ -14,7 +14,7 @@ {"trans_type": "Resize", "param": {"size": [448, 448], "interpolation": "BICUBIC"}}, {"trans_type": "ToTensor"}, {"trans_type": "norm_fun", "param": {"mean":[0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]}} - ] + ] } }, "tokenizer_config": { diff --git a/tests/st/run_configs/finetune_internvl2_8B/model_8B.json b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json index adb4be58..62b1332a 100644 --- a/tests/st/run_configs/finetune_internvl2_8B/model_8B.json +++ b/tests/st/run_configs/finetune_internvl2_8B/model_8B.json @@ -61,7 +61,7 @@ "add_qkv_bias": true, "qk_layernorm": false, "activation_func": "gelu", - "normalization": "layernorm", + "normalization": "LayerNorm", "layernorm_epsilon": 1e-6, "hidden_dropout": 0.0, "drop_path_rate": 0.0, diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index cee4d5fb..1bd01fbb 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -66,7 +66,7 @@ GPT_ARGS=" --no-masked-softmax-fusion \ --lr 4e-5 \ --min-lr 0.0 \ - --train-iters 2 \ + --train-iters 5000 \ --lr-decay-style cosine \ --weight-decay 0.05 \ --clip-grad 1.0 \ @@ -94,6 +94,7 @@ OUTPUT_ARGS=" --eval-iters 5000 \ " + torchrun $DISTRIBUTED_ARGS \ $BASEPATH/pretrain_internvl.py \ $GPT_ARGS \ -- Gitee From 406f2bab3522fc719ff07d9a46ce02d7769d5fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=86=E5=8A=B2=E5=A4=AB?= <979323654@qq.com> Date: Mon, 18 Nov 2024 17:46:20 +0800 Subject: [PATCH 7/7] =?UTF-8?q?internvl2=20ST=E6=B5=8B=E8=AF=95&build?= =?UTF-8?q?=E6=89=93=E5=8C=85=E5=86=85=E5=AE=B9=E5=AE=8C=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MANIFEST.in | 3 +++ pyproject.toml | 5 ++++- tests/st/shell_scripts/finetune_internvl2_8B.sh | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..5f1957ca --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include * +recursive-include mindspeed_mm * +recursive-include examples * diff --git a/pyproject.toml b/pyproject.toml index 5b85edc3..8e6e2e53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,11 @@ dependencies = [ requires = ["setuptools >= 65.0"] build-backend = "setuptools.build_meta" +[tool.setuptools] +include-package-data = true + [tool.setuptools.packages.find] -exclude = ["ci*", "docs", "sources", "tests"] +exclude = ["ci*", "docs", "sources", "tests*"] [project.optional-dependencies] test = [ diff --git a/tests/st/shell_scripts/finetune_internvl2_8B.sh b/tests/st/shell_scripts/finetune_internvl2_8B.sh index 1bd01fbb..8ebd7125 100644 --- a/tests/st/shell_scripts/finetune_internvl2_8B.sh +++ b/tests/st/shell_scripts/finetune_internvl2_8B.sh @@ -66,7 +66,7 @@ GPT_ARGS=" --no-masked-softmax-fusion \ --lr 4e-5 \ --min-lr 0.0 \ - --train-iters 5000 \ + --train-iters 2 \ --lr-decay-style cosine \ --weight-decay 0.05 \ --clip-grad 1.0 \ -- Gitee