From eee1c938fd9fa29eb35385a624d5d63c13e2c21f Mon Sep 17 00:00:00 2001 From: Peihan Liu Date: Mon, 11 Aug 2025 16:11:10 +0800 Subject: [PATCH] gpt4 drop bug fix --- examples/mcore/gpt4/pretrain_gpt4_moe_drop.sh | 2 +- examples/mcore/gpt4/pretrain_gpt4_moe_drop_A3_ptd.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mcore/gpt4/pretrain_gpt4_moe_drop.sh b/examples/mcore/gpt4/pretrain_gpt4_moe_drop.sh index 328e9e588..d358ce513 100644 --- a/examples/mcore/gpt4/pretrain_gpt4_moe_drop.sh +++ b/examples/mcore/gpt4/pretrain_gpt4_moe_drop.sh @@ -43,7 +43,7 @@ MOE_ARGS=" --moe-permutation-async-comm \ --disable-bias-linear \ --moe-expert-capacity-factor 1.1 \ - --moe-token-dispatcher-type alltoall_seq \ + --moe-token-dispatcher-type alltoall \ --moe-pad-expert-input-to-capacity \ --moe-layer-freq -1 \ --first-k-dense-replace -1 \ diff --git a/examples/mcore/gpt4/pretrain_gpt4_moe_drop_A3_ptd.sh b/examples/mcore/gpt4/pretrain_gpt4_moe_drop_A3_ptd.sh index f30d95422..ab326e0ba 100644 --- a/examples/mcore/gpt4/pretrain_gpt4_moe_drop_A3_ptd.sh +++ b/examples/mcore/gpt4/pretrain_gpt4_moe_drop_A3_ptd.sh @@ -44,7 +44,7 @@ MOE_ARGS=" --moe-permutation-async-comm \ --disable-bias-linear \ --moe-expert-capacity-factor 1.1 \ - --moe-token-dispatcher-type alltoall_seq \ + --moe-token-dispatcher-type alltoall \ --moe-pad-expert-input-to-capacity \ --moe-layer-freq -1 \ --first-k-dense-replace -1 \ -- Gitee