diff --git a/example/op_demo/ring_mla/README.md b/example/op_demo/ring_mla/README.md index 4011ef7e2d38bde2dd2fc8a5ceeca1ef6127d933..d29bb1b978b812e8184e462a28f1b89792ead699 100644 --- a/example/op_demo/ring_mla/README.md +++ b/example/op_demo/ring_mla/README.md @@ -24,6 +24,10 @@ ```sh g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ... ``` + - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId,默认为0,如: + ```sh + ./ring_mla_demo 0 + ``` ## 额外说明 示例中生成的数据不代表实际场景,如需数据生成参考请查看python用例目录: diff --git a/example/op_demo/ring_mla/build.sh b/example/op_demo/ring_mla/build.sh index cba32d3f5b5f2a0c1ec8923ee0c8a733c0bedbcc..3f2b3b330797c4df6df8e5921830663c79879efc 100644 --- a/example/op_demo/ring_mla/build.sh +++ b/example/op_demo/ring_mla/build.sh @@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi" g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \ ring_mla_demo.cpp ../demo_util.h -l atb -l ascendcl -o ring_mla_demo -./ring_mla_demo +./ring_mla_demo 7 diff --git a/example/op_demo/ring_mla/ring_mla_demo.cpp b/example/op_demo/ring_mla/ring_mla_demo.cpp index c4eb1aa284b9adc52342cd6dd1daec06fc460f67..5cd65bf403c7cef44dfc1820a1b51e07a8cbf9b7 100644 --- a/example/op_demo/ring_mla/ring_mla_demo.cpp +++ b/example/op_demo/ring_mla/ring_mla_demo.cpp @@ -163,9 +163,13 @@ atb::Status RunRingMLADemo(atb::Context *contextPtr, aclrtStream stream, atb::Op int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 1; + CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; diff --git a/example/op_demo/self_attention/README.md b/example/op_demo/self_attention/README.md index 1b56c3afc1f6e627bf6441383f319293b31ea2fd..0dd64c3a86e1ed7d33eb04f22716e7963405fb9d 100644 --- a/example/op_demo/self_attention/README.md +++ b/example/op_demo/self_attention/README.md @@ -24,6 +24,10 @@ ```sh g++ -D_GLIBCXX_USE_CXX11_ABI=1 -I ... ``` + - 生成的二进制文件***_demo可以额外传入一个int参数作为deviceId,默认为0,如: + ```sh + ./self_attention_encoder_demo 0 + ``` - 提供的build脚本仅用于编译和运行self_attention_encoder_demo.cpp,如需编译其他demo,需要替换“self_attention_encoder_demo”为对应的cpp文件名 ## 额外说明 @@ -65,16 +69,12 @@ SelfAttention在Atlas A2/A3系列仅支持部分场景,且Encoder场景在Atla | `seqLen` | int32 | nd | [10] | cpu | | `layerId` | int32 | nd | [1] | npu | | **Output** | -| `output` | float16 | nd | [1] | npu | +| `output` | float16 | nd | [160, 2048] | npu | + q,k,v第一维度为总词元长度,即`sum(seqlen)` + q,k,v第二维度headNum,headSize合轴,实际为headHum(32) $\times$ headSize(128) #### self_attention_encoder_inference_demo.cpp + 场景:fa encoder基础场景在Atlas推理系列上的实现,分开传入key,CacheK,value和CacheV -+ 更改编译脚本为: - `g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" self_attention_encoder_inference_demo.cpp demo_util.h -l atb -l ascendcl -o self_attention_encoder_inference_demo` -- 运行时调用: -`./self_attention_encoder_inference_demo` + 该demo仅支持在Atlas推理系列上运行 **参数设置**: diff --git a/example/op_demo/self_attention/build.sh b/example/op_demo/self_attention/build.sh index 5ee3d5649d43230706afecefaa2d1243344dabd0..b132a42ac637ce49a80853ca32b5269d2e6958af 100644 --- a/example/op_demo/self_attention/build.sh +++ b/example/op_demo/self_attention/build.sh @@ -21,4 +21,4 @@ echo "Using cxx_abi=$cxx_abi" g++ -D_GLIBCXX_USE_CXX11_ABI=$cxx_abi -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" \ self_attention_encoder_demo.cpp ../demo_util.h -l atb -l ascendcl -o self_attention_encoder_demo -./self_attention_encoder_demo +./self_attention_encoder_demo 0 diff --git a/example/op_demo/self_attention/self_attention_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_demo.cpp index b65e5d05c0e76b14085ece0d8149ff3376be3e26..4fd7b149edacda4baec54ce9fd411469e12eee2b 100644 --- a/example/op_demo/self_attention/self_attention_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_encoder_demo.cpp @@ -115,11 +115,14 @@ atb::Status PrepareOperation(atb::Operation **encoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // kv隐藏层大小,用于输出tensor shape uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE; // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp index 5b6b755190903b90de0a707212aeb9c75a642e68..06f3f6f9b23d1de078e11b609548f1fe3f6a9be4 100644 --- a/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp +++ b/example/op_demo/self_attention/self_attention_encoder_inference_demo.cpp @@ -104,9 +104,12 @@ int main(int argc, char **argv) // kv隐藏层大小,用于输出tensor shape uint32_t kvHiddenSize = KV_HEAD_NUM * HEAD_SIZE; + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp index 125ccf67d2340bc03b1dd489765aa0f57e7c5bd4..91f5f0a168f8bc5095c2a3a46192a25eebca17fb 100644 --- a/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_pa_encoder_demo.cpp @@ -85,9 +85,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp index a6acad4b27a0c842be24acba0562c51f48b40548..d2c7afe74a50de2f0eb04b8c5fe499b07658d1a3 100644 --- a/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp +++ b/example/op_demo/self_attention/self_attention_pa_encoder_qwen_demo.cpp @@ -111,9 +111,12 @@ atb::Status PrepareOperation(atb::Operation **paEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } CHECK_STATUS(aclInit(nullptr)); // 设置卡号、创建context、设置stream - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context)); diff --git a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp index 20ebcabb62d5f910d6b8c5aa0e1fab530a21ef3c..974321ca6d907bf2f64275979906332f08795eef 100644 --- a/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp +++ b/example/op_demo/self_attention/self_attention_prefix_encoder_demo.cpp @@ -106,9 +106,12 @@ atb::Status PrepareOperation(atb::Operation **prefixEncoderOp) int main(int argc, char **argv) { + int32_t deviceId = 0; + if (argc == 2) { + deviceId = std::stoi(argv[1]); + } // 设置卡号、创建context、设置stream CHECK_STATUS(aclInit(nullptr)); - int32_t deviceId = 0; CHECK_STATUS(aclrtSetDevice(deviceId)); atb::Context *context = nullptr; CHECK_STATUS(atb::CreateContext(&context));