diff --git a/vllm_mindspore/model_executor/models/mf_models/deepseekv3_weight_processor.py b/vllm_mindspore/model_executor/models/mf_models/deepseekv3_weight_processor.py index cb93a89fed7d09c89bb6eb3fce163f8e313b2d43..f2613adb61575e7a1bc08fa78a7d89e4cac7886f 100644 --- a/vllm_mindspore/model_executor/models/mf_models/deepseekv3_weight_processor.py +++ b/vllm_mindspore/model_executor/models/mf_models/deepseekv3_weight_processor.py @@ -346,6 +346,14 @@ class DeepseekV3WeightProcessor(BaseWeightProcessor): w1_scale_ms_name = f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w1._layer.matmul.weight_scale" w2_scale_ms_name = f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w2._layer.matmul.weight_scale" w3_scale_ms_name = f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w3._layer.matmul.weight_scale" + if self.ep_method == EPMethod.ALLTOALL: + # use dispatch_quant and dequantSwiGluQuant which supports group_list to optimize performance + w1_ms_name = w1_ms_name.replace("._layer.weight", ".weight") + w2_ms_name = w2_ms_name.replace("._layer.weight", ".weight") + w3_ms_name = w3_ms_name.replace("._layer.weight", ".weight") + w1_scale_ms_name = w1_scale_ms_name.replace("._layer.matmul.weight_scale", ".weight_scale") + w2_scale_ms_name = w2_scale_ms_name.replace("._layer.matmul.weight_scale", ".weight_scale") + w3_scale_ms_name = w3_scale_ms_name.replace("._layer.matmul.weight_scale", ".weight_scale") w1_list, w2_list, w3_list, w1_scale_list, w2_scale_list, w3_scale_list = \ self.infer_quant_process_moe(src_hf_dir, hf_weight_map, layer_id) @@ -361,6 +369,13 @@ class DeepseekV3WeightProcessor(BaseWeightProcessor): if ffn_concat: # w_gate_hidden w_gate_hidden_name = f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w_gate_hidden._layer.weight" + # w_scale_gate_hidden + w_scale_gate_hidden_name = \ + f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w_gate_hidden._layer.matmul.weight_scale" + if self.ep_method == EPMethod.ALLTOALL: + w_gate_hidden_name = w_gate_hidden_name.replace("._layer.weight", ".weight") + w_scale_gate_hidden_name = w_scale_gate_hidden_name.replace("._layer.matmul.weight_scale", + ".weight_scale") w_gate_hidden_np = np.concatenate( [w1_ms_stack_param, w3_ms_stack_param], axis=1) w_gate_hidden_param = ms.from_numpy(w_gate_hidden_np).permute( @@ -369,9 +384,6 @@ class DeepseekV3WeightProcessor(BaseWeightProcessor): w_gate_hidden_param, name=w_gate_hidden_name, requires_grad=False) - # w_scale_gate_hidden - w_scale_gate_hidden_name = \ - f"model.layers.{layer_id}.feed_forward.routed_experts.ffn.w_gate_hidden._layer.matmul.weight_scale" w_scale_gate_hidden_np = np.concatenate( [w1_scale_ms_stack_param, w3_scale_ms_stack_param], axis=1)