From c1e287e6e958d6a555acb034b4016147587b4fcf Mon Sep 17 00:00:00 2001 From: kongdeshuo <1670690897@qq.com> Date: Tue, 17 Jun 2025 09:31:45 +0800 Subject: [PATCH] add qwen25_7b pretrain test case --- .../baseline_results/qwen25_7b_pretrain.json | 14 ++ .../st/shell_scripts/qwen25_7b_pretrain.sh | 122 ++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 tests/mindspore/st/baseline_results/qwen25_7b_pretrain.json create mode 100644 tests/mindspore/st/shell_scripts/qwen25_7b_pretrain.sh diff --git a/tests/mindspore/st/baseline_results/qwen25_7b_pretrain.json b/tests/mindspore/st/baseline_results/qwen25_7b_pretrain.json new file mode 100644 index 000000000..c4b6b451d --- /dev/null +++ b/tests/mindspore/st/baseline_results/qwen25_7b_pretrain.json @@ -0,0 +1,14 @@ +{ + "lm loss": [ + 12.0627307891845703, + 12.0655231475830078, + 12.0705471038818359, + 12.0565109252929688, + 12.0662870407104492, + 12.0355625152587891, + 12.0509414672851562, + 12.0035610198974609, + 12.0380344390869141, + 11.9320907592773438 + ] +} \ No newline at end of file diff --git a/tests/mindspore/st/shell_scripts/qwen25_7b_pretrain.sh b/tests/mindspore/st/shell_scripts/qwen25_7b_pretrain.sh new file mode 100644 index 000000000..36822ed29 --- /dev/null +++ b/tests/mindspore/st/shell_scripts/qwen25_7b_pretrain.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +export CUDA_DEVICE_MAX_CONNECTIONS=1 + +NPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6000 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) + +basepath=$(cd `dirname $0`; cd ../../../../; pwd) +GPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6103 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) + +DATA_PATH="/data/mindspore/st/test_qwen25_pretrain/dataset/dataset/enwiki_text_document" +TOKENIZER_PATH="/data/mindspore/st/test_qwen25_pretrain/tokenizer" +CKPT_LOAD_DIR="/data/mindspore/st/test_qwen25_pretrain/load" + +TP=4 +PP=2 +SEQ_LEN=8192 +MBS=1 +GBS=8 + + +DISTRIBUTED_ARGS=" + --worker_num $WORLD_SIZE \ + --local_worker_num $NPUS_PER_NODE \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT \ + --log_dir=msrun_log \ + --node_rank $NODE_RANK \ + --join=True +" + + +GPT_ARGS=" + --use-mcore-models \ + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --sequence-parallel \ + --num-layers 8 \ + --hidden-size 3584 \ + --ffn-hidden-size 18944 \ + --num-attention-heads 28 \ + --max-position-embeddings ${SEQ_LEN} \ + --seq-length ${SEQ_LEN} \ + --disable-bias-linear \ + --add-qkv-bias \ + --group-query-attention \ + --num-query-groups 4 \ + --use-flash-attn \ + --swiglu \ + --use-fused-swiglu \ + --normalization RMSNorm \ + --norm-epsilon 1e-6 \ + --use-fused-rmsnorm \ + --position-embedding-type rope \ + --rotary-base 1000000 \ + --use-fused-rotary-pos-emb \ + --untie-embeddings-and-output-weights \ + --micro-batch-size ${MBS} \ + --global-batch-size ${GBS} \ + --make-vocab-size-divisible-by 1 \ + --padded-vocab-size 152064 \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path ${TOKENIZER_PATH} \ + --attention-dropout 0.0 \ + --hidden-dropout 0.0 \ + --train-iters 2000 \ + --lr 1.25e-6 \ + --lr-decay-style cosine \ + --min-lr 1.25e-7 \ + --lr-warmup-fraction 0.01 \ + --init-method-std 0.01 \ + --weight-decay 1e-1 \ + --clip-grad 1.0 \ + --adam-beta1 0.9 \ + --adam-beta2 0.95 \ + --initial-loss-scale 4096 \ + --no-gradient-accumulation-fusion \ + --no-masked-softmax-fusion \ + --attention-softmax-in-fp32 \ + --bf16 +" + +DATA_ARGS=" + --data-path $DATA_PATH \ + --split 100,0,0 +" + +CKPT_ARGS=" + --no-load-optim \ + --no-load-rng \ + --no-save-optim \ + --no-save-rng \ + --seed 1234 \ +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 2000 \ + --eval-interval 2000 \ + --eval-iters 0 \ + --log-throughput +" + + +msrun $DISTRIBUTED_ARGS pretrain_gpt.py \ + $GPT_ARGS \ + $DATA_ARGS \ + $CKPT_ARGS \ + $OUTPUT_ARGS \ + --distributed-backend nccl \ + --load ${CKPT_LOAD_DIR} \ + --ai-framework mindspore \ + | tee logs/pretrain_qwen25_7b_32k.log \ No newline at end of file -- Gitee