diff --git a/examples/mcore/qwen3/evaluate_qwen3_0point6b_ptd.sh b/examples/mcore/qwen3/evaluate_qwen3_0point6b_ptd.sh index e41d2f13beb058cf4cf256a7574537625b555422..0148eb4985cfae66daccc772f3d6aa5115e83c99 100644 --- a/examples/mcore/qwen3/evaluate_qwen3_0point6b_ptd.sh +++ b/examples/mcore/qwen3/evaluate_qwen3_0point6b_ptd.sh @@ -11,7 +11,7 @@ TASK="mmlu" # Change for multinode config MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 NPUS_PER_NODE=8 diff --git a/examples/mcore/qwen3/evaluate_qwen3_14b_ptd.sh b/examples/mcore/qwen3/evaluate_qwen3_14b_ptd.sh index fc91749084c84aed2a22b2ee346b28f73eb4e02c..8d6be3f6d86b5cb4a56812a57dfebd85a07002f5 100644 --- a/examples/mcore/qwen3/evaluate_qwen3_14b_ptd.sh +++ b/examples/mcore/qwen3/evaluate_qwen3_14b_ptd.sh @@ -11,7 +11,7 @@ TASK="mmlu" # Change for multinode config MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 NPUS_PER_NODE=8 diff --git a/examples/mcore/qwen3/tune_qwen3_8b_4K_lora_ptd.sh b/examples/mcore/qwen3/tune_qwen3_8b_4K_lora_ptd.sh index 825addbd7282ee98eb68e2f7a216c59e2605a4b0..4093fea5bf131c03d2b2a6cc794eea7d2b50db68 100644 --- a/examples/mcore/qwen3/tune_qwen3_8b_4K_lora_ptd.sh +++ b/examples/mcore/qwen3/tune_qwen3_8b_4K_lora_ptd.sh @@ -4,7 +4,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 NPUS_PER_NODE=8 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) diff --git a/examples/mcore/qwen3_moe/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh b/examples/mcore/qwen3_moe/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh index 203c0f6a000cbac1809cb06b7d6d6b2f33290ab5..bedccb5875565465da608f1200f06ef311d31d6d 100644 --- a/examples/mcore/qwen3_moe/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh +++ b/examples/mcore/qwen3_moe/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh @@ -9,7 +9,7 @@ export TASK_QUEUE_ENABLE=2 NPUS_PER_NODE=16 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=16 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) @@ -20,7 +20,7 @@ DATA_PATH="your data path" TOKENIZER_PATH="your tokenizer path" CKPT_LOAD_DIR="your model ckpt path" -TP=2 +TP=1 PP=4 EP=32 CP=1 @@ -58,7 +58,6 @@ MOE_ARGS=" --moe-aux-loss-coeff 0.001 \ --moe-permutation-async-comm \ --moe-alltoall-overlap-comm \ - --moe-tp-extend-ep \ " OPTIMIZE_ARGS=" diff --git a/tests/0day/qwen3/qwen3-0.6b/evaluate_qwen3_0point6b_ptd.sh b/tests/0day/qwen3/qwen3-0.6b/evaluate_qwen3_0point6b_ptd.sh index 03c7a0dfe9f919995ad18ccff2e97cb06903d8a8..0b7476e52b43f936d3a4e5cd36a9699c591330c6 100644 --- a/tests/0day/qwen3/qwen3-0.6b/evaluate_qwen3_0point6b_ptd.sh +++ b/tests/0day/qwen3/qwen3-0.6b/evaluate_qwen3_0point6b_ptd.sh @@ -11,7 +11,7 @@ TASK="mmlu" # Change for multinode config MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 NPUS_PER_NODE=8 diff --git a/tests/0day/qwen3/qwen3-14b/evaluate_qwen3_14b_ptd.sh b/tests/0day/qwen3/qwen3-14b/evaluate_qwen3_14b_ptd.sh index fc91749084c84aed2a22b2ee346b28f73eb4e02c..8d6be3f6d86b5cb4a56812a57dfebd85a07002f5 100644 --- a/tests/0day/qwen3/qwen3-14b/evaluate_qwen3_14b_ptd.sh +++ b/tests/0day/qwen3/qwen3-14b/evaluate_qwen3_14b_ptd.sh @@ -11,7 +11,7 @@ TASK="mmlu" # Change for multinode config MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 NPUS_PER_NODE=8 diff --git a/tests/0day/qwen3/qwen3-235b-a22b/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh b/tests/0day/qwen3/qwen3-235b-a22b/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh index 140b12696ddfb2d27229d28cbc71553572756cd3..d20d167f15bcb06b1839d4a6c485010321ae3b65 100644 --- a/tests/0day/qwen3/qwen3-235b-a22b/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh +++ b/tests/0day/qwen3/qwen3-235b-a22b/pretrain_qwen3_235b_a22b_4k_A3_ptd.sh @@ -9,7 +9,7 @@ export TASK_QUEUE_ENABLE=2 NPUS_PER_NODE=16 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=16 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) diff --git a/tests/0day/qwen3/qwen3-32b/tune_qwen3_32b_4K_lora_ptd.sh b/tests/0day/qwen3/qwen3-32b/tune_qwen3_32b_4K_lora_ptd.sh index 81f30468b2ebec193f944d2b08d1669a12e6bb66..9de182f4925cbdb733ba99359286bc10d077ab0c 100644 --- a/tests/0day/qwen3/qwen3-32b/tune_qwen3_32b_4K_lora_ptd.sh +++ b/tests/0day/qwen3/qwen3-32b/tune_qwen3_32b_4K_lora_ptd.sh @@ -4,7 +4,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 NPUS_PER_NODE=8 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) diff --git a/tests/0day/qwen3/qwen3-8b/tune_qwen3_8b_4K_lora_ptd.sh b/tests/0day/qwen3/qwen3-8b/tune_qwen3_8b_4K_lora_ptd.sh index 825addbd7282ee98eb68e2f7a216c59e2605a4b0..4093fea5bf131c03d2b2a6cc794eea7d2b50db68 100644 --- a/tests/0day/qwen3/qwen3-8b/tune_qwen3_8b_4K_lora_ptd.sh +++ b/tests/0day/qwen3/qwen3-8b/tune_qwen3_8b_4K_lora_ptd.sh @@ -4,7 +4,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 NPUS_PER_NODE=8 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) diff --git a/tests/pipeline/llama2/llama2_tp1_pp8_patch_gloo_ptd.sh b/tests/pipeline/llama2/llama2_tp1_pp8_patch_gloo_ptd.sh index 1a2593da39bd13da4c9a729f9da4809a0a0a4b3a..c3f0cbaa55ff733b0e8510f227b73a5ee43691f8 100644 --- a/tests/pipeline/llama2/llama2_tp1_pp8_patch_gloo_ptd.sh +++ b/tests/pipeline/llama2/llama2_tp1_pp8_patch_gloo_ptd.sh @@ -7,7 +7,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 # config for multinode NPUS_PER_NODE=8 MASTER_ADDR=localhost -MASTER_PORT=60014 +MASTER_PORT=6000 NNODES=1 NODE_RANK=0 WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))