From ef4742e1afbae2819a165000b4cca34dba9e1ebe Mon Sep 17 00:00:00 2001 From: ivanshan_8170 Date: Fri, 26 Sep 2025 11:53:00 +0800 Subject: [PATCH 1/2] fix fa demo readme, ringmla set_device id --- example/op_demo/ring_mla/ring_mla_demo.cpp | 2 +- example/op_demo/self_attention/README.md | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/example/op_demo/ring_mla/ring_mla_demo.cpp b/example/op_demo/ring_mla/ring_mla_demo.cpp index c4eb1aa2..3c7125f7 100644 --- a/example/op_demo/ring_mla/ring_mla_demo.cpp +++ b/example/op_demo/ring_mla/ring_mla_demo.cpp @@ -165,7 +165,7 @@ int main(int argc, char **argv) { CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 1; + int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; diff --git a/example/op_demo/self_attention/README.md b/example/op_demo/self_attention/README.md index 1b56c3af..a7479a31 100644 --- a/example/op_demo/self_attention/README.md +++ b/example/op_demo/self_attention/README.md @@ -65,16 +65,12 @@ SelfAttention在Atlas A2/A3系列仅支持部分场景,且Encoder场景在Atla | `seqLen` | int32 | nd | [10] | cpu | | `layerId` | int32 | nd | [1] | npu | | **Output** | -| `output` | float16 | nd | [1] | npu | +| `output` | float16 | nd | [160, 2048] | npu | + q,k,v第一维度为总词元长度,即`sum(seqlen)` + q,k,v第二维度headNum,headSize合轴,实际为headHum(32) $\times$ headSize(128) #### self_attention_encoder_inference_demo.cpp + 场景:fa encoder基础场景在Atlas推理系列上的实现,分开传入key,CacheK,value和CacheV -+ 更改编译脚本为: - `g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" self_attention_encoder_inference_demo.cpp demo_util.h -l atb -l ascendcl -o self_attention_encoder_inference_demo` -- 运行时调用: -`./self_attention_encoder_inference_demo` + 该demo仅支持在Atlas推理系列上运行 **参数设置**: -- Gitee From 54a77c54ea549474b8a5e9beb9d5acb61713c5c7 Mon Sep 17 00:00:00 2001 From: ivanshan_8170 Date: Fri, 26 Sep 2025 15:52:35 +0800 Subject: [PATCH 2/2] fix comments --- example/op_demo/ring_mla/README.md | 4 ++++ example/op_demo/ring_mla/build.sh | 2 +- example/op_demo/ring_mla/ring_mla_demo.cpp | 6 +++++- example/op_demo/self_attention/README.md | 4 ++++ example/op_demo/self_attention/build.sh | 2 +- .../op_demo/self_attention/self_attention_encoder_demo.cpp | 5 ++++- .../self_attention_encoder_inference_demo.cpp | 5 ++++- .../self_attention/self_attention_pa_encoder_demo.cpp | 5 ++++- .../self_attention/self_attention_pa_encoder_qwen_demo.cpp | 5 ++++- .../self_attention/self_attention_prefix_encoder_demo.cpp | 5 ++++- 10 files changed, 35 insertions(+), 8 deletions(-) diff --git a/example/op_demo/ring_mla/README.md b/example/op_demo/ring_mla/README.md index 4011ef7e..d29bb1b9 100644 --- a/example/op_demo/ring_mla/README.md +++ b/example/op_demo/ring_mla/README.md @@ -24,6 +24,10 @@ ```sh g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ... ``` + - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId,默认为0,如: + ```sh + ./ring_mla_demo 0 + ``` ## 额外说明 示例中生成的数据不代表实际场景,如需数据生成参考请查看python用例目录: diff --git a/example/op_demo/ring_mla/build.sh b/example/op_demo/ring_mla/build.sh index cba32d3f..3f2b3b33 100644 --- a/example/op_demo/ring_mla/build.sh +++ b/example/op_demo/ring_mla/build.sh @@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi" g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \ ring_mla_demo.cpp ../demo_util.h -l atb -l ascendcl -o ring_mla_demo -./ring_mla_demo +./ring_mla_demo 7 diff --git a/example/op_demo/ring_mla/ring_mla_demo.cpp b/example/op_demo/ring_mla/ring_mla_demo.cpp index 3c7125f7..5cd65bf4 100644 --- a/example/op_demo/ring_mla/ring_mla_demo.cpp +++ b/example/op_demo/ring_mla/ring_mla_demo.cpp @@ -163,9 +163,13 @@ atb::Status RunRingMLADemo(atb::Context *contextPtr, aclrtStream stream, atb::Op int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 0; + CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; diff --git a/example/op_demo/self_attention/README.md b/example/op_demo/self_attention/README.md index a7479a31..0dd64c3a 100644 --- a/example/op_demo/self_attention/README.md +++ b/example/op_demo/self_attention/README.md @@ -24,6 +24,10 @@ ```sh g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ... ``` + - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId,默认为0,如: + ```sh + ./self_attention_encoder_demo 0 + ``` - 提供的build脚本仅用于编译和运行self_attention_encoder_demo.cpp,如需编译其他demo,需要替换“self_attention_encoder_demo”为对应的cpp文件名 ## 额外说明 diff --git a/example/op_demo/self_attention/build.sh b/example/op_demo/self_attention/build.sh index 5ee3d564..b132a42a 100644 --- a/example/op_demo/self_attention/build.sh +++ b/example/op_demo/self_attention/build.sh @@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi" g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \ self_attention_encoder_demo.cpp ../demo_util.h -l atb -l ascendcl -o self_attention_encoder_demo -./self_attention_encoder_demo +./self_attention_encoder_demo 0 diff --git a/example/op_demo/self_attention/self_attention_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_demo.cpp index b65e5d05..4fd7b149 100644 --- a/example/op_demo/self_attention/self_attention_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_encoder_demo.cpp @@ -115,11 +115,14 @@ atb::Status PrepareOperation(atb::Operation **encoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // kv隐藏层大小,用于输出tensor shape uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE; // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp index 5b6b7551..06f3f6f9 100644 --- a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp +++ b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp @@ -104,9 +104,12 @@ int main(int argc, char **argv) // kv隐藏层大小,用于输出tensor shape uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE; + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp index 125ccf67..91f5f0a1 100644 --- a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp @@ -85,9 +85,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp index a6acad4b..d2c7afe7 100644 --- a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp +++ b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp @@ -111,9 +111,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp index 20ebcabb..974321ca 100644 --- a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp @@ -106,9 +106,12 @@ atb::Status PrepareOperation(atb::Operation **prefixEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); -- Gitee