From ef4742e1afbae2819a165000b4cca34dba9e1ebe Mon Sep 17 00:00:00 2001
From: ivanshan_8170 <shanzidan@h-partners.com>
Date: Fri, 26 Sep 2025 11:53:00 +0800
Subject: [PATCH 1/2] fix fa demo readme, ringmla set_device id

---
 example/op_demo/ring_mla/ring_mla_demo.cpp | 2 +-
 example/op_demo/self_attention/README.md   | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/example/op_demo/ring_mla/ring_mla_demo.cpp b/example/op_demo/ring_mla/ring_mla_demo.cpp
index c4eb1aa2..3c7125f7 100644
--- a/example/op_demo/ring_mla/ring_mla_demo.cpp
+++ b/example/op_demo/ring_mla/ring_mla_demo.cpp
@@ -165,7 +165,7 @@ int main(int argc, char **argv)
 {
     CHECK_STATUS(aclInit(nullptr));
     // 设置卡号、创建context、设置stream
-    int32_t deviceId = 1;
+    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
 
     atb::Context *context = nullptr;
diff --git a/example/op_demo/self_attention/README.md b/example/op_demo/self_attention/README.md
index 1b56c3af..a7479a31 100644
--- a/example/op_demo/self_attention/README.md
+++ b/example/op_demo/self_attention/README.md
@@ -65,16 +65,12 @@ SelfAttention在Atlas A2/A3系列仅支持部分场景，且Encoder场景在Atla
 | `seqLen`        | int32    | nd       | [10]                | cpu     |
 | `layerId`       | int32    | nd       | [1]                 | npu     |
 | **Output**      |
-| `output`        | float16  | nd       | [1]                 | npu     |
+| `output`        | float16  | nd       | [160, 2048]         | npu     |
 + q，k，v第一维度为总词元长度，即`sum(seqlen)`
 + q，k，v第二维度headNum，headSize合轴，实际为headHum(32) $\times$ headSize(128)
 
 #### self_attention_encoder_inference_demo.cpp
 + 场景：fa encoder基础场景在Atlas推理系列上的实现，分开传入key，CacheK，value和CacheV
-+ 更改编译脚本为：
- `g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" self_attention_encoder_inference_demo.cpp demo_util.h -l atb -l ascendcl -o self_attention_encoder_inference_demo`
-- 运行时调用：
-`./self_attention_encoder_inference_demo`
 + 该demo仅支持在Atlas推理系列上运行
 
 **参数设置**：
-- 
Gitee


From 54a77c54ea549474b8a5e9beb9d5acb61713c5c7 Mon Sep 17 00:00:00 2001
From: ivanshan_8170 <shanzidan@h-partners.com>
Date: Fri, 26 Sep 2025 15:52:35 +0800
Subject: [PATCH 2/2] fix comments

---
 example/op_demo/ring_mla/README.md                          | 4 ++++
 example/op_demo/ring_mla/build.sh                           | 2 +-
 example/op_demo/ring_mla/ring_mla_demo.cpp                  | 6 +++++-
 example/op_demo/self_attention/README.md                    | 4 ++++
 example/op_demo/self_attention/build.sh                     | 2 +-
 .../op_demo/self_attention/self_attention_encoder_demo.cpp  | 5 ++++-
 .../self_attention_encoder_inference_demo.cpp               | 5 ++++-
 .../self_attention/self_attention_pa_encoder_demo.cpp       | 5 ++++-
 .../self_attention/self_attention_pa_encoder_qwen_demo.cpp  | 5 ++++-
 .../self_attention/self_attention_prefix_encoder_demo.cpp   | 5 ++++-
 10 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/example/op_demo/ring_mla/README.md b/example/op_demo/ring_mla/README.md
index 4011ef7e..d29bb1b9 100644
--- a/example/op_demo/ring_mla/README.md
+++ b/example/op_demo/ring_mla/README.md
@@ -24,6 +24,10 @@
         ```sh
         g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ...
         ```
+    - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId，默认为0，如：
+        ```sh
+        ./ring_mla_demo 0
+        ```
 
 ## 额外说明
 示例中生成的数据不代表实际场景，如需数据生成参考请查看python用例目录：
diff --git a/example/op_demo/ring_mla/build.sh b/example/op_demo/ring_mla/build.sh
index cba32d3f..3f2b3b33 100644
--- a/example/op_demo/ring_mla/build.sh
+++ b/example/op_demo/ring_mla/build.sh
@@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi"
 
 g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \
 ring_mla_demo.cpp ../demo_util.h -l atb -l ascendcl -o ring_mla_demo
-./ring_mla_demo
+./ring_mla_demo 7
diff --git a/example/op_demo/ring_mla/ring_mla_demo.cpp b/example/op_demo/ring_mla/ring_mla_demo.cpp
index 3c7125f7..5cd65bf4 100644
--- a/example/op_demo/ring_mla/ring_mla_demo.cpp
+++ b/example/op_demo/ring_mla/ring_mla_demo.cpp
@@ -163,9 +163,13 @@ atb::Status RunRingMLADemo(atb::Context *contextPtr, aclrtStream stream, atb::Op
 
 int main(int argc, char **argv)
 {
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     CHECK_STATUS(aclInit(nullptr));
     // 设置卡号、创建context、设置stream
-    int32_t deviceId = 0;
+    
     CHECK_STATUS(aclrtSetDevice(deviceId));
 
     atb::Context *context = nullptr;
diff --git a/example/op_demo/self_attention/README.md b/example/op_demo/self_attention/README.md
index a7479a31..0dd64c3a 100644
--- a/example/op_demo/self_attention/README.md
+++ b/example/op_demo/self_attention/README.md
@@ -24,6 +24,10 @@
         ```sh
         g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ...
         ```
+    - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId，默认为0，如：
+        ```sh
+        ./self_attention_encoder_demo 0
+        ```
     - 提供的build脚本仅用于编译和运行self_attention_encoder_demo.cpp，如需编译其他demo，需要替换“self_attention_encoder_demo”为对应的cpp文件名
 
 ## 额外说明
diff --git a/example/op_demo/self_attention/build.sh b/example/op_demo/self_attention/build.sh
index 5ee3d564..b132a42a 100644
--- a/example/op_demo/self_attention/build.sh
+++ b/example/op_demo/self_attention/build.sh
@@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi"
 
 g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \
 self_attention_encoder_demo.cpp ../demo_util.h -l atb -l ascendcl -o self_attention_encoder_demo
-./self_attention_encoder_demo
+./self_attention_encoder_demo 0
diff --git a/example/op_demo/self_attention/self_attention_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_demo.cpp
index b65e5d05..4fd7b149 100644
--- a/example/op_demo/self_attention/self_attention_encoder_demo.cpp
+++ b/example/op_demo/self_attention/self_attention_encoder_demo.cpp
@@ -115,11 +115,14 @@ atb::Status PrepareOperation(atb::Operation **encoderOp)
 
 int main(int argc, char **argv)
 {
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     // kv隐藏层大小，用于输出tensor shape
     uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE;
     // 设置卡号、创建context、设置stream
     CHECK_STATUS(aclInit(nullptr));
-    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
     atb::Context *context = nullptr;
     CHECK_STATUS(atb::CreateContext(&context));
diff --git a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp
index 5b6b7551..06f3f6f9 100644
--- a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp
+++ b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp
@@ -104,9 +104,12 @@ int main(int argc, char **argv)
     // kv隐藏层大小，用于输出tensor shape
     uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE;
 
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     // 设置卡号、创建context、设置stream
     CHECK_STATUS(aclInit(nullptr));
-    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
     atb::Context *context = nullptr;
     CHECK_STATUS(atb::CreateContext(&context));
diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp
index 125ccf67..91f5f0a1 100644
--- a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp
+++ b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp
@@ -85,9 +85,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp)
 
 int main(int argc, char **argv)
 {
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     CHECK_STATUS(aclInit(nullptr));
     // 设置卡号、创建context、设置stream
-    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
     atb::Context *context = nullptr;
     CHECK_STATUS(atb::CreateContext(&context));
diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp
index a6acad4b..d2c7afe7 100644
--- a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp
+++ b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp
@@ -111,9 +111,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp)
 
 int main(int argc, char **argv)
 {
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     CHECK_STATUS(aclInit(nullptr));
     // 设置卡号、创建context、设置stream
-    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
     atb::Context *context = nullptr;
     CHECK_STATUS(atb::CreateContext(&context));
diff --git a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp
index 20ebcabb..974321ca 100644
--- a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp
+++ b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp
@@ -106,9 +106,12 @@ atb::Status PrepareOperation(atb::Operation **prefixEncoderOp)
 
 int main(int argc, char **argv)
 {
+    int32_t deviceId = 0;
+    if (argc == 2) {
+        deviceId = std::stoi(argv[1]);
+    }
     // 设置卡号、创建context、设置stream
     CHECK_STATUS(aclInit(nullptr));
-    int32_t deviceId = 0;
     CHECK_STATUS(aclrtSetDevice(deviceId));
     atb::Context *context = nullptr;
     CHECK_STATUS(atb::CreateContext(&context));
-- 
Gitee