From 3dc7c7cea51761c195487ac3fce4373406381efa Mon Sep 17 00:00:00 2001 From: ivanshan_8170 Date: Wed, 27 Aug 2025 18:06:36 +0800 Subject: [PATCH] fix: security --- .../all_to_all/all_to_all_lccl_runner.cpp | 12 ++++++------ .../all_to_all/all_to_all_operation.cpp | 8 ++++---- .../dynamic_ntk/dynamic_ntk_ops_runner.cpp | 1 - src/ops_infer/ring_mla/ring_mla_operation.cpp | 18 ++++++++++++------ ...ttention_encoder_fuison_ops_runner_910a.cpp | 15 ++++++++++----- ...attention_fusion_bypass_ops_runner_910a.cpp | 2 +- ...attention_fusion_bypass_ops_runner_BNSD.cpp | 3 +-- ...tion_fusion_bypass_ops_runner_BNSD_910a.cpp | 3 ++- .../self_attention_fusion_ops_runner_910a.cpp | 8 ++++++-- .../self_attention_operation.cpp | 18 +++++++----------- 10 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp index 8faf35f0..59c360dc 100644 --- a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp +++ b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp @@ -16,8 +16,8 @@ namespace atb { AllToAllLcclRunner::AllToAllLcclRunner(const infer::AllToAllParam ¶m, Context &context) - : LcclRunner("AllToAllLcclRunner", RUNNER_TYPE_ALL_TO_ALL, param.rank, param.rankSize, param.commMode, - context, param.commDomain), + : LcclRunner("AllToAllLcclRunner", RUNNER_TYPE_ALL_TO_ALL, param.rank, param.rankSize, param.commMode, context, + param.commDomain), param_(param) { ATB_LOG(INFO) << "AllToAllLcclRunner::AllToAllLcclRunner called"; @@ -43,10 +43,10 @@ Status AllToAllLcclRunner::ExecuteImpl(RunnerVariantPack &runnerVariantPack) } else { int64_t width = runnerVariantPack.inTensors[0].desc.shape.dims[1]; int64_t burstlen = width / param_.rankSize; - lccl_->All2All(runnerVariantPack.inTensors[0].deviceData, runnerVariantPack.outTensors.at(0).deviceData, - Utils::GetTensorNumel(runnerVariantPack.inTensors.at(0)), static_cast(burstlen), - static_cast(width), GetHcclDtype(runnerVariantPack.inTensors.at(0).desc.dtype), - GetExecuteStream(runnerVariantPack.context)); + ret = lccl_->All2All(runnerVariantPack.inTensors[0].deviceData, runnerVariantPack.outTensors.at(0).deviceData, + Utils::GetTensorNumel(runnerVariantPack.inTensors.at(0)), static_cast(burstlen), + static_cast(width), GetHcclDtype(runnerVariantPack.inTensors.at(0).desc.dtype), + GetExecuteStream(runnerVariantPack.context)); } if (ret == Lcal::LCAL_ERROR_PARA_CHECK_FAIL) { ATB_LOG(ERROR) << "ret: " << ret << " LCCL_PARALLEL should be 0 or fasle"; diff --git a/src/ops_infer/all_to_all/all_to_all_operation.cpp b/src/ops_infer/all_to_all/all_to_all_operation.cpp index a0b635d0..bef637b7 100644 --- a/src/ops_infer/all_to_all/all_to_all_operation.cpp +++ b/src/ops_infer/all_to_all/all_to_all_operation.cpp @@ -59,13 +59,13 @@ template <> Status CreateOperation(const infer::AllToAllParam &opParam, Operatio return ERROR_INVALID_PARAM; } } - if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize - ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize"; - return ERROR_INVALID_PARAM; - } if (OperationUtil::DistributedInitCheck(opParam) != NO_ERROR) { ATB_LOG(ERROR) << "AllToAllOperation DistributedInitCheck failed"; return ERROR_INVALID_PARAM; + } + if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize + ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize"; + return ERROR_INVALID_PARAM; } *operation = new (std::nothrow) AllToAllOperation(opParam); if (*operation == nullptr) { diff --git a/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp b/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp index 487f4edc..bc4a712f 100644 --- a/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp +++ b/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp @@ -34,7 +34,6 @@ DynamicNTKOpsRunner::DynamicNTKOpsRunner(const infer::DynamicNTKParam ¶m) Mki::Tensor &sinTensor = kernelGraph_.outTensors.at(outId++); Mki::Tensor &cosTensor = kernelGraph_.outTensors.at(outId++); - kernelGraph_.nodes.resize(1); auto &dynamicNTKNode = kernelGraph_.nodes[0]; dynamicNTKNode.opDesc = {0, "DynamicNTKOperation", asdopsParam_}; dynamicNTKNode.inTensors = {&positionIds, &invfreqIn, &seqlens}; diff --git a/src/ops_infer/ring_mla/ring_mla_operation.cpp b/src/ops_infer/ring_mla/ring_mla_operation.cpp index ffbc3233..a6d53966 100644 --- a/src/ops_infer/ring_mla/ring_mla_operation.cpp +++ b/src/ops_infer/ring_mla/ring_mla_operation.cpp @@ -20,7 +20,8 @@ #include "ring_mla_ops_runner.h" namespace { -static const uint32_t BASE_IN_TENSOR_NUM = 7; // query1, query2, key1, key2, value, mask, seqLen, (prevOut), (prevLse) +static const uint32_t BASE_IN_TENSOR_NUM = 7; // query1, query2, key1, key2, value, mask, seqLen +static const uint32_t RING_OPTIONAL_IN_TENSOR_NUM = 2; // prevOut, prevLse static const uint32_t BASE_OUT_TENSOR_NUM = 2; // output, softmaxLse // dimNum static const uint32_t QKV_DIM_NUM = 3; // [sum(seqlen), headNum, headSize] @@ -154,7 +155,7 @@ RingMLAOperation::~RingMLAOperation() {} uint32_t RingMLAOperation::GetInputNum() const { if (isInputSoftmaxLse_) { - return BASE_IN_TENSOR_NUM + 2; // 2: prevLse, prevOut + return BASE_IN_TENSOR_NUM + RING_OPTIONAL_IN_TENSOR_NUM; } return BASE_IN_TENSOR_NUM; } @@ -188,7 +189,7 @@ bool RingMLAOperation::DimNumCheck(const SVector &inTensorDescs, Ext } if (inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 1 && // 1: [batch] - inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 2) { // 1: [2, batch] + inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 2) { // 2: [2, batch] extError.errorDesc = "dimNum of seqlen should be 1 or 2!"; extError.errorData = OperationUtil::ConcatInfo(", but got seqlen dimNum: ", inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum); @@ -224,9 +225,9 @@ bool RingMLAOperation::QSplitDimCheck(const SVector &inTensorDescs, return false; } if (inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX] != QK_SPLIT2_HEAD_SIZE) { - extError.errorDesc = OperationUtil::ConcatInfo("headSize of querySplit1 must be ", QK_SPLIT2_HEAD_SIZE); + extError.errorDesc = OperationUtil::ConcatInfo("headSize of querySplit2 must be ", QK_SPLIT2_HEAD_SIZE); extError.errorData = OperationUtil::ConcatInfo( - "But got querySplit1[2] headSize: ", inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX]); + "But got querySplit2[2] headSize: ", inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX]); ATB_LOG(ERROR) << GetLogPrefix() << extError; return false; } @@ -299,7 +300,10 @@ Status RingMLAOperation::DimCheck(const SVector &inTensorDescs) cons // qkv shape: [q/kv nTokens, q/kv HeadNum, qk/v headSize] extError.errorType = ERROR_INVALID_TENSOR_DIM; extError.solutionDesc = "Please check the shape of querySplit1, querySplit2, keySplit1, keySplit2 and value."; - if (!QSplitDimCheck(inTensorDescs, extError) || !KSplitDimCheck(inTensorDescs, extError)) { + if (!QSplitDimCheck(inTensorDescs, extError)) { + return extError.errorType; + } + if (!KSplitDimCheck(inTensorDescs, extError)) { return extError.errorType; } int64_t kvHeadNum = inTensorDescs.at(IN_KEY_SPLIT1_INDEX).shape.dims[QKV_HEAD_NUM_IDX]; @@ -451,6 +455,8 @@ Status RingMLAOperation::SetupCheckImpl(const SVector &inTensors, const for (size_t i = 0; i < BASE_OUT_TENSOR_NUM; ++i) { if (!TensorUtil::TensorDescEqual(outTensorDescs.at(i), targetOutTensorDescs.at(i))) { extError.errorDesc = OperationUtil::ConcatInfo("Invalid outTensor shape at outTensors[", i, "]."); + ss.str(""); + ss.clear(); ss << "Target outTensor shape: ["; int32_t dimNum = static_cast(targetOutTensorDescs.at(i).shape.dimNum); for (int32_t j = 0; j < dimNum - 1; ++j) { diff --git a/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp index a550f96a..8339bcf0 100644 --- a/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp +++ b/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp @@ -20,7 +20,7 @@ namespace atb { void TransQKVEncoderViewFunc910a(const Mki::SVector &oldDims, Mki::SVector &newDims) { if (oldDims.size() != 3) { // 3: q, k, v 必须三维 - ATB_LOG(ERROR) << "q, k, v should all be three dims"; + ATB_LOG(ERROR) << "The dimNum of q, k, v should all be 3"; return; } newDims = {1, oldDims.at(0), oldDims.at(1) * oldDims.at(2)}; @@ -86,6 +86,11 @@ Status SelfAttentionEncoderFusionOpsRunner910A::SetupKernelGraph(const OpsTensor qTransdataNode.inTensors = {&query}; qTransdataNode.outTensors = {&queryNz}; qTransdataNode.inferShapePreFunc = [&](Mki::LaunchParam &launchParam) { + if (launchParam.GetInTensor(0).desc.dims.size() < 3) { + ATB_LOG(ERROR) << "expect intensor dimNum to be at least 3, but got: " + << launchParam.GetInTensor(0).desc.dims.size(); + return; + } ntokens_ = launchParam.GetInTensor(0).desc.dims.at(1); hiddenSize_ = launchParam.GetInTensor(0).desc.dims.at(2); // 2: 第三维 }; @@ -149,14 +154,14 @@ Status SelfAttentionEncoderFusionOpsRunner910A::SetupKernelGraph(const OpsTensor bool SelfAttentionEncoderFusionOpsRunner910A::NeedModifySlopes(const OpsTensorPack &opsTensorPack) { bool isMaskCompress = param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS || - param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT; + param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT; if (!isMaskCompress) { return false; } else { return opsTensorPack.inTensors.at(3).desc.dims.size() == 4 && // 3: maskId, 4: 非 [1,256//16,256,16] 的情况 - (opsTensorPack.inTensors.at(3).desc.dims[0] != 1 || // 3: maskId - opsTensorPack.inTensors.at(3).desc.dims[1] != 16 || // 3: maskId, 16: mask shape - opsTensorPack.inTensors.at(3).desc.dims[2] != 256); // 3: maskId, 256: mask shape + (opsTensorPack.inTensors.at(3).desc.dims[0] != 1 || // 3: maskId + opsTensorPack.inTensors.at(3).desc.dims[1] != 16 || // 3: maskId, 16: mask shape + opsTensorPack.inTensors.at(3).desc.dims[2] != 256); // 3: maskId, 256: mask shape } } diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp index 1758642a..202e3182 100644 --- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp +++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp @@ -41,7 +41,7 @@ void TransAttnMaskViewFuncBypass910a(const Mki::SVector &oldDims, Mki:: void FlashAttentionInferShapePreFuncBypass910a(Mki::LaunchParam &launchParam) { if (launchParam.GetInTensors().size() < 4) { // 4: inTensor数量不少于4 - ATB_LOG(ERROR) << "inTensor num should be at least 5"; + ATB_LOG(ERROR) << "inTensor num should be at least 4"; return; } launchParam.GetInTensor(3).desc.dtype = Mki::TENSOR_DTYPE_UINT32; // 3: 设置第四个输入张量的dtype diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp index 2408950f..efef3594 100644 --- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp +++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp @@ -110,9 +110,8 @@ Status SelfAttentionFusionBypassOpsRunnerBNSD::ModifyKernelGraph(const OpsTensor return ERROR_INVALID_PARAM; } - ATB_LOG(INFO) << "kernelGraph_.nodes.size" << kernelGraph_.nodes.size(); auto &flashAttentionNode = kernelGraph_.nodes.at(1); // 1: flashAttention节点位置 - ATB_LOG(INFO) << "kernelGraph_.nodes.size"; + ATB_LOG(INFO) << "kernelGraph_.nodes.size: " << kernelGraph_.nodes.size(); AtbOps::OpParam::UnpadFlashAttention flashAttentionQParam; SetFAParam(flashAttentionQParam); ATB_LOG(INFO) << "SetFAParam"; diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp index 25c005f3..5b744628 100644 --- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp +++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp @@ -90,7 +90,8 @@ Status SelfAttentionFusionBypassOpsRunnerBNSD910A::SetupKernelGraph(const OpsTen static_cast(ACL_FORMAT_UNDEFINED); kernelGraph_.internalTensors.resize( (attnMaskFormat == static_cast(ACL_FORMAT_FRACTAL_NZ) || (!needMask)) ? - 3 : (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 4 : 3)); // 4, 3: 设置中间tensor数 + 3 : + (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 4 : 3)); // 4, 3: 设置中间tensor数 size_t internalTensorId = 0; Mki::Tensor &transdataQResultTensor = kernelGraph_.internalTensors.at(internalTensorId++); diff --git a/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp index cc6413e2..a3affe42 100644 --- a/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp +++ b/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp @@ -20,6 +20,7 @@ namespace atb { void TransKVViewFunc910a(const Mki::SVector &oldDims, Mki::SVector &newDims) { if (oldDims.size() < 2) { // 2: 最小维度 + ATB_LOG(ERROR) << "intensor key/value's dimNum shoule be at least 2"; return; } if (oldDims.size() != 4) { // 4: 维度长度 @@ -32,6 +33,7 @@ void TransKVViewFunc910a(const Mki::SVector &oldDims, Mki::SVector &oldDims, Mki::SVector &newDims) { if (oldDims.size() < 2) { // 2: 最小维度 + ATB_LOG(ERROR) << "intensor query's dimNum shoule be at least 2"; return; } if (oldDims.size() != 4) { // 4: 维度长度 @@ -111,7 +113,8 @@ Status SelfAttentionFusionOpsRunner910A::SetupKernelGraph(const OpsTensorPack &o needMask_ ? opsTensorPack.inTensors.at(5).desc.format : static_cast(ACL_FORMAT_UNDEFINED); kernelGraph_.internalTensors.resize( (attnMaskFormat == static_cast(ACL_FORMAT_FRACTAL_NZ) || (!needMask_)) ? - 7 : (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 8 : 7)); // 7, 8: 设置总节点数 + 7 : + (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 8 : 7)); // 7, 8: 设置总节点数 size_t internalTensorId = 0; Mki::Tensor &transdataKResultTensor = kernelGraph_.internalTensors.at(internalTensorId++); @@ -129,7 +132,8 @@ Status SelfAttentionFusionOpsRunner910A::SetupKernelGraph(const OpsTensorPack &o Mki::Tensor &contextTranspose = kernelGraph_.internalTensors.at(internalTensorId++); kernelGraph_.nodes.resize((attnMaskFormat == static_cast(ACL_FORMAT_ND) && needMask_) ? - 11 : 10); // 10, 11: 设置总节点数 + 11 : + 10); // 10, 11: 设置总节点数 size_t nodeId = 0; auto &transdataKNode = kernelGraph_.nodes.at(nodeId++); diff --git a/src/ops_infer/self_attention/self_attention_operation.cpp b/src/ops_infer/self_attention/self_attention_operation.cpp index e7262743..3080cbb5 100644 --- a/src/ops_infer/self_attention/self_attention_operation.cpp +++ b/src/ops_infer/self_attention/self_attention_operation.cpp @@ -94,10 +94,6 @@ template <> Status CreateOperation(const infer::SelfAttentionParam &opParam, Ope ATB_LOG(ERROR) << "scaleType should be in the range of its enum value"; return ERROR_INVALID_PARAM; } - if (opParam.scaleType == infer::SelfAttentionParam::SCALE_TYPE_LOGN && needQKVQuant) { - ATB_LOG(ERROR) << "both scaleType and QKVQuant are enable"; - return ERROR_INVALID_PARAM; - } if (opParam.calcType == infer::SelfAttentionParam::PA_ENCODER && opParam.kvcacheCfg == atb::infer::SelfAttentionParam::K_BYPASS_V_BYPASS) { ATB_LOG(ERROR) << "when calcType is PA_ENCODER, kvcacheCfg should not be K_BYPASS_V_BYPASS"; @@ -1126,7 +1122,7 @@ Status SelfAttentionOperation::MaxHeadSizeCheck910B(const int64_t headSizeK, con } if (param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS || param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT || - param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT) { + param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN) { maxHeadSize = 128; // 128: 压缩alibi情况headsize小于等于128 } if (headSizeK > maxHeadSize) { @@ -1172,7 +1168,7 @@ Status SelfAttentionOperation::HeadSizeDimCheck310P(const SVector &i } if (param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS || param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT || - param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT) { + param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN) { if (headSizeK > 128 || headSizeV > 128) { // 128: 压缩alibi情况headsize小于等于128 ATB_LOG(ERROR) << "headSize of key and value should be no greater than 128 with alibi compress mask"; return ERROR_INVALID_TENSOR_DIM; @@ -1420,16 +1416,16 @@ Status SelfAttentionOperation::InferShapeDimNumCheck(const SVector & Status SelfAttentionOperation::InferShapePADimNumCheckBNSD(const SVector &inTensorDescs) const { if (inTensorDescs.at(0).shape.dimNum != 4) { // 4: [batch, head_num, seq_len, head_size] - ATB_LOG(ERROR) << "dimNum of query should be 4"; + ATB_LOG(ERROR) << "dimNum of query should be 4, bot got: " << inTensorDescs.at(0).shape.dimNum; return ERROR_INVALID_TENSOR_DIM_NUM; } if (inTensorDescs.at(kcacheId_).shape.dimNum != 4) { // 4: [batch, head_num, seq_len, head_size] - ATB_LOG(ERROR) << "dimNum of key should be 4"; + ATB_LOG(ERROR) << "dimNum of key should be 4, bot got: " << inTensorDescs.at(kcacheId_).shape.dimNum; return ERROR_INVALID_TENSOR_DIM_NUM; } - if (inTensorDescs.at(tokenOffsetId_).shape.dimNum != 1 && - inTensorDescs.at(tokenOffsetId_).shape.dimNum != 2) { // 2: seqlen: [2, batch] - ATB_LOG(ERROR) << "dimNum of seqlen should be 1 or 2"; + uint64_t seqLenDimNum = inTensorDescs.at(tokenOffsetId_).shape.dimNum; + if (seqLenDimNum != 1 && seqLenDimNum != 2) { // 2: seqlen: [2, batch] + ATB_LOG(ERROR) << "dimNum of seqlen should be 1 or 2, bot got: " << seqLenDimNum; return ERROR_INVALID_TENSOR_DIM_NUM; } return NO_ERROR; -- Gitee