From 3dc7c7cea51761c195487ac3fce4373406381efa Mon Sep 17 00:00:00 2001
From: ivanshan_8170 <shanzidan@h-partners.com>
Date: Wed, 27 Aug 2025 18:06:36 +0800
Subject: [PATCH] fix: security

---
 .../all_to_all/all_to_all_lccl_runner.cpp      | 12 ++++++------
 .../all_to_all/all_to_all_operation.cpp        |  8 ++++----
 .../dynamic_ntk/dynamic_ntk_ops_runner.cpp     |  1 -
 src/ops_infer/ring_mla/ring_mla_operation.cpp  | 18 ++++++++++++------
 ...ttention_encoder_fuison_ops_runner_910a.cpp | 15 ++++++++++-----
 ...attention_fusion_bypass_ops_runner_910a.cpp |  2 +-
 ...attention_fusion_bypass_ops_runner_BNSD.cpp |  3 +--
 ...tion_fusion_bypass_ops_runner_BNSD_910a.cpp |  3 ++-
 .../self_attention_fusion_ops_runner_910a.cpp  |  8 ++++++--
 .../self_attention_operation.cpp               | 18 +++++++-----------
 10 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
index 8faf35f0..59c360dc 100644
--- a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
+++ b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
@@ -16,8 +16,8 @@
 
 namespace atb {
 AllToAllLcclRunner::AllToAllLcclRunner(const infer::AllToAllParam &param, Context &context)
-    : LcclRunner("AllToAllLcclRunner", RUNNER_TYPE_ALL_TO_ALL, param.rank, param.rankSize, param.commMode,
-                 context, param.commDomain),
+    : LcclRunner("AllToAllLcclRunner", RUNNER_TYPE_ALL_TO_ALL, param.rank, param.rankSize, param.commMode, context,
+                 param.commDomain),
       param_(param)
 {
     ATB_LOG(INFO) << "AllToAllLcclRunner::AllToAllLcclRunner called";
@@ -43,10 +43,10 @@ Status AllToAllLcclRunner::ExecuteImpl(RunnerVariantPack &runnerVariantPack)
     } else {
         int64_t width = runnerVariantPack.inTensors[0].desc.shape.dims[1];
         int64_t burstlen = width / param_.rankSize;
-        lccl_->All2All(runnerVariantPack.inTensors[0].deviceData, runnerVariantPack.outTensors.at(0).deviceData,
-                       Utils::GetTensorNumel(runnerVariantPack.inTensors.at(0)), static_cast<int>(burstlen),
-                       static_cast<int>(width), GetHcclDtype(runnerVariantPack.inTensors.at(0).desc.dtype),
-                       GetExecuteStream(runnerVariantPack.context));
+        ret = lccl_->All2All(runnerVariantPack.inTensors[0].deviceData, runnerVariantPack.outTensors.at(0).deviceData,
+                             Utils::GetTensorNumel(runnerVariantPack.inTensors.at(0)), static_cast<int>(burstlen),
+                             static_cast<int>(width), GetHcclDtype(runnerVariantPack.inTensors.at(0).desc.dtype),
+                             GetExecuteStream(runnerVariantPack.context));
     }
     if (ret == Lcal::LCAL_ERROR_PARA_CHECK_FAIL) {
         ATB_LOG(ERROR) << "ret: " << ret << " LCCL_PARALLEL should be 0 or fasle";
diff --git a/src/ops_infer/all_to_all/all_to_all_operation.cpp b/src/ops_infer/all_to_all/all_to_all_operation.cpp
index a0b635d0..bef637b7 100644
--- a/src/ops_infer/all_to_all/all_to_all_operation.cpp
+++ b/src/ops_infer/all_to_all/all_to_all_operation.cpp
@@ -59,13 +59,13 @@ template <> Status CreateOperation(const infer::AllToAllParam &opParam, Operatio
             return ERROR_INVALID_PARAM;
         }
     }
-    if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize
-        ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize";
-        return ERROR_INVALID_PARAM;
-    }
     if (OperationUtil::DistributedInitCheck<infer::AllToAllParam>(opParam) != NO_ERROR) {
         ATB_LOG(ERROR) << "AllToAllOperation DistributedInitCheck failed";
         return ERROR_INVALID_PARAM;
+    }
+        if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize
+        ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize";
+        return ERROR_INVALID_PARAM;
     }
     *operation = new (std::nothrow) AllToAllOperation(opParam);
     if (*operation == nullptr) {
diff --git a/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp b/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp
index 487f4edc..bc4a712f 100644
--- a/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp
+++ b/src/ops_infer/dynamic_ntk/dynamic_ntk_ops_runner.cpp
@@ -34,7 +34,6 @@ DynamicNTKOpsRunner::DynamicNTKOpsRunner(const infer::DynamicNTKParam &param)
     Mki::Tensor &sinTensor = kernelGraph_.outTensors.at(outId++);
     Mki::Tensor &cosTensor = kernelGraph_.outTensors.at(outId++);
 
-    kernelGraph_.nodes.resize(1);
     auto &dynamicNTKNode = kernelGraph_.nodes[0];
     dynamicNTKNode.opDesc = {0, "DynamicNTKOperation", asdopsParam_};
     dynamicNTKNode.inTensors = {&positionIds, &invfreqIn, &seqlens};
diff --git a/src/ops_infer/ring_mla/ring_mla_operation.cpp b/src/ops_infer/ring_mla/ring_mla_operation.cpp
index ffbc3233..a6d53966 100644
--- a/src/ops_infer/ring_mla/ring_mla_operation.cpp
+++ b/src/ops_infer/ring_mla/ring_mla_operation.cpp
@@ -20,7 +20,8 @@
 #include "ring_mla_ops_runner.h"
 
 namespace {
-static const uint32_t BASE_IN_TENSOR_NUM = 7;  // query1, query2, key1, key2, value, mask, seqLen, (prevOut), (prevLse)
+static const uint32_t BASE_IN_TENSOR_NUM = 7;  // query1, query2, key1, key2, value, mask, seqLen
+static const uint32_t RING_OPTIONAL_IN_TENSOR_NUM = 2; // prevOut, prevLse
 static const uint32_t BASE_OUT_TENSOR_NUM = 2; // output, softmaxLse
 // dimNum
 static const uint32_t QKV_DIM_NUM = 3; // [sum(seqlen), headNum, headSize]
@@ -154,7 +155,7 @@ RingMLAOperation::~RingMLAOperation() {}
 uint32_t RingMLAOperation::GetInputNum() const
 {
     if (isInputSoftmaxLse_) {
-        return BASE_IN_TENSOR_NUM + 2; // 2: prevLse, prevOut
+        return BASE_IN_TENSOR_NUM + RING_OPTIONAL_IN_TENSOR_NUM;
     }
     return BASE_IN_TENSOR_NUM;
 }
@@ -188,7 +189,7 @@ bool RingMLAOperation::DimNumCheck(const SVector<TensorDesc> &inTensorDescs, Ext
     }
 
     if (inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 1 && // 1: [batch]
-        inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 2) { // 1: [2, batch]
+        inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum != 2) { // 2: [2, batch]
         extError.errorDesc = "dimNum of seqlen should be 1 or 2!";
         extError.errorData =
             OperationUtil::ConcatInfo(", but got seqlen dimNum: ", inTensorDescs.at(IN_SEQLEN_INDEX).shape.dimNum);
@@ -224,9 +225,9 @@ bool RingMLAOperation::QSplitDimCheck(const SVector<TensorDesc> &inTensorDescs,
         return false;
     }
     if (inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX] != QK_SPLIT2_HEAD_SIZE) {
-        extError.errorDesc = OperationUtil::ConcatInfo("headSize of querySplit1 must be ", QK_SPLIT2_HEAD_SIZE);
+        extError.errorDesc = OperationUtil::ConcatInfo("headSize of querySplit2 must be ", QK_SPLIT2_HEAD_SIZE);
         extError.errorData = OperationUtil::ConcatInfo(
-            "But got querySplit1[2] headSize: ", inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX]);
+            "But got querySplit2[2] headSize: ", inTensorDescs.at(IN_QUERY_SPLIT2_INDEX).shape.dims[QKV_HEAD_SIZE_IDX]);
         ATB_LOG(ERROR) << GetLogPrefix() << extError;
         return false;
     }
@@ -299,7 +300,10 @@ Status RingMLAOperation::DimCheck(const SVector<TensorDesc> &inTensorDescs) cons
     // qkv shape: [q/kv nTokens, q/kv HeadNum, qk/v headSize]
     extError.errorType = ERROR_INVALID_TENSOR_DIM;
     extError.solutionDesc = "Please check the shape of querySplit1, querySplit2, keySplit1, keySplit2 and value.";
-    if (!QSplitDimCheck(inTensorDescs, extError) || !KSplitDimCheck(inTensorDescs, extError)) {
+    if (!QSplitDimCheck(inTensorDescs, extError)) {
+        return extError.errorType;
+    }
+    if (!KSplitDimCheck(inTensorDescs, extError)) {
         return extError.errorType;
     }
     int64_t kvHeadNum = inTensorDescs.at(IN_KEY_SPLIT1_INDEX).shape.dims[QKV_HEAD_NUM_IDX];
@@ -451,6 +455,8 @@ Status RingMLAOperation::SetupCheckImpl(const SVector<Tensor> &inTensors, const
     for (size_t i = 0; i < BASE_OUT_TENSOR_NUM; ++i) {
         if (!TensorUtil::TensorDescEqual(outTensorDescs.at(i), targetOutTensorDescs.at(i))) {
             extError.errorDesc = OperationUtil::ConcatInfo("Invalid outTensor shape at outTensors[", i, "].");
+            ss.str("");
+            ss.clear();
             ss << "Target outTensor shape: [";
             int32_t dimNum = static_cast<int32_t>(targetOutTensorDescs.at(i).shape.dimNum);
             for (int32_t j = 0; j < dimNum - 1; ++j) {
diff --git a/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp
index a550f96a..8339bcf0 100644
--- a/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp
+++ b/src/ops_infer/self_attention/self_attention_encoder_fuison_ops_runner_910a.cpp
@@ -20,7 +20,7 @@ namespace atb {
 void TransQKVEncoderViewFunc910a(const Mki::SVector<int64_t> &oldDims, Mki::SVector<int64_t> &newDims)
 {
     if (oldDims.size() != 3) { // 3: q, k, v 必须三维
-        ATB_LOG(ERROR) << "q, k, v should all be three dims";
+        ATB_LOG(ERROR) << "The dimNum of q, k, v should all be 3";
         return;
     }
     newDims = {1, oldDims.at(0), oldDims.at(1) * oldDims.at(2)};
@@ -86,6 +86,11 @@ Status SelfAttentionEncoderFusionOpsRunner910A::SetupKernelGraph(const OpsTensor
     qTransdataNode.inTensors = {&query};
     qTransdataNode.outTensors = {&queryNz};
     qTransdataNode.inferShapePreFunc = [&](Mki::LaunchParam &launchParam) {
+        if (launchParam.GetInTensor(0).desc.dims.size() < 3) {
+            ATB_LOG(ERROR) << "expect intensor dimNum to be at least 3, but got: "
+                           << launchParam.GetInTensor(0).desc.dims.size();
+            return;
+        }
         ntokens_ = launchParam.GetInTensor(0).desc.dims.at(1);
         hiddenSize_ = launchParam.GetInTensor(0).desc.dims.at(2); // 2: 第三维
     };
@@ -149,14 +154,14 @@ Status SelfAttentionEncoderFusionOpsRunner910A::SetupKernelGraph(const OpsTensor
 bool SelfAttentionEncoderFusionOpsRunner910A::NeedModifySlopes(const OpsTensorPack &opsTensorPack)
 {
     bool isMaskCompress = param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS ||
-                           param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT;
+                          param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT;
     if (!isMaskCompress) {
         return false;
     } else {
         return opsTensorPack.inTensors.at(3).desc.dims.size() == 4 && // 3: maskId, 4: 非 [1,256//16,256,16] 的情况
-               (opsTensorPack.inTensors.at(3).desc.dims[0] != 1 || // 3: maskId
-                opsTensorPack.inTensors.at(3).desc.dims[1] != 16 || // 3: maskId, 16: mask shape
-                opsTensorPack.inTensors.at(3).desc.dims[2] != 256); // 3: maskId, 256: mask shape
+               (opsTensorPack.inTensors.at(3).desc.dims[0] != 1 ||    // 3: maskId
+                opsTensorPack.inTensors.at(3).desc.dims[1] != 16 ||   // 3: maskId, 16: mask shape
+                opsTensorPack.inTensors.at(3).desc.dims[2] != 256);   // 3: maskId, 256: mask shape
     }
 }
 
diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp
index 1758642a..202e3182 100644
--- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp
+++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_910a.cpp
@@ -41,7 +41,7 @@ void TransAttnMaskViewFuncBypass910a(const Mki::SVector<int64_t> &oldDims, Mki::
 void FlashAttentionInferShapePreFuncBypass910a(Mki::LaunchParam &launchParam)
 {
     if (launchParam.GetInTensors().size() < 4) { // 4: inTensor数量不少于4
-        ATB_LOG(ERROR) << "inTensor num should be at least 5";
+        ATB_LOG(ERROR) << "inTensor num should be at least 4";
         return;
     }
     launchParam.GetInTensor(3).desc.dtype = Mki::TENSOR_DTYPE_UINT32; // 3: 设置第四个输入张量的dtype
diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp
index 2408950f..efef3594 100644
--- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp
+++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD.cpp
@@ -110,9 +110,8 @@ Status SelfAttentionFusionBypassOpsRunnerBNSD::ModifyKernelGraph(const OpsTensor
         return ERROR_INVALID_PARAM;
     }
 
-    ATB_LOG(INFO) << "kernelGraph_.nodes.size" << kernelGraph_.nodes.size();
     auto &flashAttentionNode = kernelGraph_.nodes.at(1); // 1: flashAttention节点位置
-    ATB_LOG(INFO) << "kernelGraph_.nodes.size";
+    ATB_LOG(INFO) << "kernelGraph_.nodes.size: " << kernelGraph_.nodes.size();
     AtbOps::OpParam::UnpadFlashAttention flashAttentionQParam;
     SetFAParam(flashAttentionQParam);
     ATB_LOG(INFO) << "SetFAParam";
diff --git a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp
index 25c005f3..5b744628 100644
--- a/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp
+++ b/src/ops_infer/self_attention/self_attention_fusion_bypass_ops_runner_BNSD_910a.cpp
@@ -90,7 +90,8 @@ Status SelfAttentionFusionBypassOpsRunnerBNSD910A::SetupKernelGraph(const OpsTen
                                      static_cast<Mki::TensorFormat>(ACL_FORMAT_UNDEFINED);
     kernelGraph_.internalTensors.resize(
         (attnMaskFormat == static_cast<Mki::TensorFormat>(ACL_FORMAT_FRACTAL_NZ) || (!needMask)) ?
-            3 : (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 4 : 3)); // 4, 3: 设置中间tensor数
+            3 :
+            (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 4 : 3)); // 4, 3: 设置中间tensor数
 
     size_t internalTensorId = 0;
     Mki::Tensor &transdataQResultTensor = kernelGraph_.internalTensors.at(internalTensorId++);
diff --git a/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp b/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp
index cc6413e2..a3affe42 100644
--- a/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp
+++ b/src/ops_infer/self_attention/self_attention_fusion_ops_runner_910a.cpp
@@ -20,6 +20,7 @@ namespace atb {
 void TransKVViewFunc910a(const Mki::SVector<int64_t> &oldDims, Mki::SVector<int64_t> &newDims)
 {
     if (oldDims.size() < 2) { // 2: 最小维度
+        ATB_LOG(ERROR) << "intensor key/value's dimNum shoule be at least 2";
         return;
     }
     if (oldDims.size() != 4) {                       // 4: 维度长度
@@ -32,6 +33,7 @@ void TransKVViewFunc910a(const Mki::SVector<int64_t> &oldDims, Mki::SVector<int6
 void TransQViewFunc910a(const Mki::SVector<int64_t> &oldDims, Mki::SVector<int64_t> &newDims)
 {
     if (oldDims.size() < 2) { // 2: 最小维度
+        ATB_LOG(ERROR) << "intensor query's dimNum shoule be at least 2";
         return;
     }
     if (oldDims.size() != 4) {                       // 4: 维度长度
@@ -111,7 +113,8 @@ Status SelfAttentionFusionOpsRunner910A::SetupKernelGraph(const OpsTensorPack &o
         needMask_ ? opsTensorPack.inTensors.at(5).desc.format : static_cast<Mki::TensorFormat>(ACL_FORMAT_UNDEFINED);
     kernelGraph_.internalTensors.resize(
         (attnMaskFormat == static_cast<Mki::TensorFormat>(ACL_FORMAT_FRACTAL_NZ) || (!needMask_)) ?
-            7 : (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 8 : 7)); // 7, 8: 设置总节点数
+            7 :
+            (param_.maskType == atb::infer::SelfAttentionParam::MASK_TYPE_ALIBI ? 8 : 7)); // 7, 8: 设置总节点数
 
     size_t internalTensorId = 0;
     Mki::Tensor &transdataKResultTensor = kernelGraph_.internalTensors.at(internalTensorId++);
@@ -129,7 +132,8 @@ Status SelfAttentionFusionOpsRunner910A::SetupKernelGraph(const OpsTensorPack &o
     Mki::Tensor &contextTranspose = kernelGraph_.internalTensors.at(internalTensorId++);
 
     kernelGraph_.nodes.resize((attnMaskFormat == static_cast<Mki::TensorFormat>(ACL_FORMAT_ND) && needMask_) ?
-                                  11 : 10); // 10, 11: 设置总节点数
+                                  11 :
+                                  10); // 10, 11: 设置总节点数
     size_t nodeId = 0;
 
     auto &transdataKNode = kernelGraph_.nodes.at(nodeId++);
diff --git a/src/ops_infer/self_attention/self_attention_operation.cpp b/src/ops_infer/self_attention/self_attention_operation.cpp
index e7262743..3080cbb5 100644
--- a/src/ops_infer/self_attention/self_attention_operation.cpp
+++ b/src/ops_infer/self_attention/self_attention_operation.cpp
@@ -94,10 +94,6 @@ template <> Status CreateOperation(const infer::SelfAttentionParam &opParam, Ope
         ATB_LOG(ERROR) << "scaleType should be in the range of its enum value";
         return ERROR_INVALID_PARAM;
     }
-    if (opParam.scaleType == infer::SelfAttentionParam::SCALE_TYPE_LOGN && needQKVQuant) {
-        ATB_LOG(ERROR) << "both scaleType and QKVQuant are enable";
-        return ERROR_INVALID_PARAM;
-    }
     if (opParam.calcType == infer::SelfAttentionParam::PA_ENCODER &&
         opParam.kvcacheCfg == atb::infer::SelfAttentionParam::K_BYPASS_V_BYPASS) {
         ATB_LOG(ERROR) << "when calcType is PA_ENCODER, kvcacheCfg should not be K_BYPASS_V_BYPASS";
@@ -1126,7 +1122,7 @@ Status SelfAttentionOperation::MaxHeadSizeCheck910B(const int64_t headSizeK, con
     }
     if (param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS ||
         param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT ||
-        param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT) {
+        param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN) {
         maxHeadSize = 128; // 128: 压缩alibi情况headsize小于等于128
     }
     if (headSizeK > maxHeadSize) {
@@ -1172,7 +1168,7 @@ Status SelfAttentionOperation::HeadSizeDimCheck310P(const SVector<TensorDesc> &i
     }
     if (param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS ||
         param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT ||
-        param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_SQRT) {
+        param_.maskType == infer::SelfAttentionParam::MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN) {
         if (headSizeK > 128 || headSizeV > 128) { // 128: 压缩alibi情况headsize小于等于128
             ATB_LOG(ERROR) << "headSize of key and value should be no greater than 128 with alibi compress mask";
             return ERROR_INVALID_TENSOR_DIM;
@@ -1420,16 +1416,16 @@ Status SelfAttentionOperation::InferShapeDimNumCheck(const SVector<TensorDesc> &
 Status SelfAttentionOperation::InferShapePADimNumCheckBNSD(const SVector<TensorDesc> &inTensorDescs) const
 {
     if (inTensorDescs.at(0).shape.dimNum != 4) { // 4: [batch, head_num, seq_len, head_size]
-        ATB_LOG(ERROR) << "dimNum of query should be 4";
+        ATB_LOG(ERROR) << "dimNum of query should be 4, bot got: " << inTensorDescs.at(0).shape.dimNum;
         return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     if (inTensorDescs.at(kcacheId_).shape.dimNum != 4) { // 4: [batch, head_num, seq_len, head_size]
-        ATB_LOG(ERROR) << "dimNum of key should be 4";
+        ATB_LOG(ERROR) << "dimNum of key should be 4, bot got: " << inTensorDescs.at(kcacheId_).shape.dimNum;
         return ERROR_INVALID_TENSOR_DIM_NUM;
     }
-    if (inTensorDescs.at(tokenOffsetId_).shape.dimNum != 1 &&
-        inTensorDescs.at(tokenOffsetId_).shape.dimNum != 2) { // 2: seqlen: [2, batch]
-        ATB_LOG(ERROR) << "dimNum of seqlen should be 1 or 2";
+    uint64_t seqLenDimNum = inTensorDescs.at(tokenOffsetId_).shape.dimNum;
+    if (seqLenDimNum != 1 && seqLenDimNum != 2) { // 2: seqlen: [2, batch]
+        ATB_LOG(ERROR) << "dimNum of seqlen should be 1 or 2, bot got: " << seqLenDimNum;
         return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     return NO_ERROR;
-- 
Gitee