From 733a139b247b137c09963800a7c8a48ffb0f4c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=82=85=E9=AA=8F?= Date: Tue, 28 Oct 2025 22:17:30 +0800 Subject: [PATCH] test aclop launch --- single_op/compile/op_kernel_registry.cpp | 62 +++++++++--------------- single_op/compile/op_kernel_registry.h | 8 +-- single_op/compile/op_kernel_selector.cpp | 9 ++-- single_op/compile/op_kernel_selector.h | 3 +- single_op/executor/op_task.cpp | 21 +++++--- single_op/executor/op_task.h | 5 +- single_op/executor/stream_executor.cpp | 2 +- 7 files changed, 52 insertions(+), 58 deletions(-) diff --git a/single_op/compile/op_kernel_registry.cpp b/single_op/compile/op_kernel_registry.cpp index 469d37b5..8851a636 100644 --- a/single_op/compile/op_kernel_registry.cpp +++ b/single_op/compile/op_kernel_registry.cpp @@ -14,19 +14,21 @@ #include "utils/file_utils.h" #include "common/log_inner.h" #include "error_codes_inner.h" +#include "acl/acl_rt.h" namespace acl { namespace { const char_t *const STUB_NAME_PREFIX = "acl_dynamic_"; } // namespace -const void *OpKernelRegistry::GetStubFunc(const std::string &opType, const std::string &kernelId) +aclError OpKernelRegistry::GetKernelInfo(const std::string &opType, const std::string &kernelId, + aclrtBinHandle &binHandle, const void *&kernelName) { const std::lock_guard lk(mu_); std::map>>::const_iterator kernelsIter = kernels_.find(opType); if (kernelsIter == kernels_.cend()) { ACL_LOG_INNER_ERROR("[Find][OpType]No kernel was compiled for op = %s", opType.c_str()); - return nullptr; + return ACL_ERROR_INVALID_PARAM; } auto &kernelsOfOp = kernelsIter->second; @@ -34,22 +36,12 @@ const void *OpKernelRegistry::GetStubFunc(const std::string &opType, const std:: if (it == kernelsOfOp.cend()) { ACL_LOG_INNER_ERROR("[Find][KernelId]Kernel not compiled for opType = %s and kernelId = %s", opType.c_str(), kernelId.c_str()); - return nullptr; + return ACL_ERROR_INVALID_PARAM; } - return it->second->stubName.c_str(); -} - -OpKernelRegistry::~OpKernelRegistry() -{ - const std::lock_guard lk(mu_); - for (auto &kernelsOfOp : kernels_) { - ACL_LOG_DEBUG("To unregister kernel of op: %s", kernelsOfOp.first.c_str()); - for (auto &it : kernelsOfOp.second) { - ACL_LOG_DEBUG("To unregister bin by handle: %p, kernelId = %s", it.second->binHandle, it.first.c_str()); - (void)rtDevBinaryUnRegister(it.second->binHandle); - } - } + binHandle = it->second->binHandle; + kernelName = it->second->kernelName.c_str(); + return ACL_SUCCESS; } aclError OpKernelRegistry::Register(std::unique_ptr &®istration) @@ -62,41 +54,31 @@ aclError OpKernelRegistry::Register(std::unique_ptr &®i const auto iter = kernels_.find(registration->opType); if ((iter != kernels_.end()) && (iter->second.count(registration->kernelId) > 0U)) { ACL_LOG_INNER_ERROR("[Find][Kernel]Kernel already registered. kernelId = %s", - registration->kernelId.c_str()); + registration->kernelId.c_str()); return ACL_ERROR_KERNEL_ALREADY_REGISTERED; } registration->stubName = std::string(STUB_NAME_PREFIX); registration->stubName += registration->kernelId; - rtDevBinary_t binary; - binary.version = 0U; - binary.data = registration->binData; - binary.length = registration->binSize; + + aclrtBinHandle binHandle = nullptr; + aclrtBinaryLoadOptionValue val{}; if (registration->enginetype == ACL_ENGINE_AICORE) { - binary.magic = RT_DEV_BINARY_MAGIC_ELF; + val.magic = ACL_RT_BINARY_MAGIC_ELF_AICORE; } else if (registration->enginetype == ACL_ENGINE_VECTOR) { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + val.magic = ACL_RT_BINARY_MAGIC_ELF_VECTOR_CORE; } else { return ACL_ERROR_INVALID_PARAM; } - void *binHandle = nullptr; - const auto ret = rtDevBinaryRegister(&binary, &binHandle); - if (ret != RT_ERROR_NONE) { - ACL_LOG_CALL_ERROR("[Register][Dev]rtDevBinaryRegister failed, runtime errorCode = %d", - static_cast(ret)); - return ACL_GET_ERRCODE_RTS(ret); + aclrtBinaryLoadOption opt{ACL_RT_BINARY_LOAD_OPT_MAGIC, val}; + aclrtBinaryLoadOptions options{&opt, 1U}; + auto ret = aclrtBinaryLoadFromData(registration->binData, registration->binSize, &options, &binHandle); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_CALL_ERROR("[Register][Dev]aclrtBinaryLoadFromData failed. kernel name = %s, " + "runtime errorCode = %d", registration->kernelName.c_str(), + static_cast(ret)); + return ret; } - - const auto rtRet = rtFunctionRegister(binHandle, registration->stubName.c_str(), registration->stubName.c_str(), - registration->kernelName.c_str(), static_cast(FUNC_MODE_NORMAL)); - if (rtRet != RT_ERROR_NONE) { - (void)rtDevBinaryUnRegister(binHandle); - ACL_LOG_CALL_ERROR("[Register][Dev]rtFunctionRegister failed. bin key = %s, kernel name = %s, " - "runtime errorCode = %d", registration->stubName.c_str(), registration->kernelName.c_str(), - static_cast(rtRet)); - return ACL_GET_ERRCODE_RTS(rtRet); - } - registration->binHandle = binHandle; registration->deallocator = deallocator; (void)kernels_[registration->opType].emplace(registration->kernelId, std::move(registration)); diff --git a/single_op/compile/op_kernel_registry.h b/single_op/compile/op_kernel_registry.h index 891a9e5b..e550a46c 100644 --- a/single_op/compile/op_kernel_registry.h +++ b/single_op/compile/op_kernel_registry.h @@ -18,6 +18,7 @@ #include #include "acl/acl_op.h" +#include "acl/acl_rt.h" namespace acl { struct OpKernelRegistration { @@ -37,12 +38,12 @@ struct OpKernelRegistration { void *binData = nullptr; uint64_t binSize = 0UL; void (*deallocator)(void *data, size_t length) = nullptr; - void *binHandle = nullptr; + aclrtBinHandle binHandle = nullptr; }; class OpKernelRegistry { public: - ~OpKernelRegistry(); + ~OpKernelRegistry() = default; static OpKernelRegistry &GetInstance() { @@ -50,7 +51,8 @@ public: return instance; } - const void *GetStubFunc(const std::string &opType, const std::string &kernelId); + aclError GetKernelInfo(const std::string &opType, const std::string &kernelId, + aclrtBinHandle &binHandle, const void *&kernelName); aclError Register(std::unique_ptr &®istration); diff --git a/single_op/compile/op_kernel_selector.cpp b/single_op/compile/op_kernel_selector.cpp index 6cc2674d..91fd26fe 100644 --- a/single_op/compile/op_kernel_selector.cpp +++ b/single_op/compile/op_kernel_selector.cpp @@ -79,16 +79,17 @@ aclError OpKernelSelector::SelectOpKernel(const AclOp &op) ACL_CHECK_MALLOC_RESULT(desc); ACL_REQUIRES_OK(InsertAclop2KernelDesc(op, desc)); ACL_LOG_DEBUG("To invoke select func, opType = %s", op.opType.c_str()); - const auto ret = func(op.numInputs, op.inputDesc, op.numOutputs, op.outputDesc, op.opAttr, desc.get()); + auto ret = func(op.numInputs, op.inputDesc, op.numOutputs, op.outputDesc, op.opAttr, desc.get()); if (ret != ACL_SUCCESS) { ACL_LOG_INNER_ERROR("[Call][Compile]Failed to call op compile, errorCode = %d", ret); return ret; } ACL_LOG_DEBUG("selecting kernel succeeded. kernelId = %s", desc->kernelId.c_str()); - desc->stubFunc = OpKernelRegistry::GetInstance().GetStubFunc(op.opType, desc->kernelId); - if (desc->stubFunc == nullptr) { - ACL_LOG_INNER_ERROR("Stub function not registered. kernelId = %s", desc->kernelId.c_str()); + ret = OpKernelRegistry::GetInstance().GetKernelInfo(op.opType, desc->kernelId, + desc->binHandle, desc->kernelName); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_INNER_ERROR("kernel not registered. kernelId = %s", desc->kernelId.c_str()); return ACL_ERROR_KERNEL_NOT_FOUND; } desc->timestamp = attr_utils::GetCurrentTimestamp(); diff --git a/single_op/compile/op_kernel_selector.h b/single_op/compile/op_kernel_selector.h index ae5af322..f3772cd3 100644 --- a/single_op/compile/op_kernel_selector.h +++ b/single_op/compile/op_kernel_selector.h @@ -23,7 +23,8 @@ struct aclopKernelDesc { std::string kernelId; - const void *stubFunc = nullptr; // no need for deallocating + aclrtBinHandle binHandle = nullptr; + const void *kernelName = nullptr; uint32_t blockDim = 0U; std::vector workspaceSizes; std::string extendArgs; diff --git a/single_op/executor/op_task.cpp b/single_op/executor/op_task.cpp index 14ed333c..b5fda9e3 100755 --- a/single_op/executor/op_task.cpp +++ b/single_op/executor/op_task.cpp @@ -10,11 +10,12 @@ #include "op_task.h" #include "runtime/rt.h" +#include "runtime/rts/rts_kernel.h" #include "error_codes_inner.h" namespace acl { -TbeOpTask::TbeOpTask(const void *const stubFunction, const uint32_t block) : OpTask(), - stubFunc_(stubFunction), blockDim_(block) +TbeOpTask::TbeOpTask(aclrtBinHandle binHandle, const void *kernelName, const uint32_t block) : OpTask(), + binHandle_(binHandle), kernelName_(kernelName), blockDim_(block) { } @@ -37,11 +38,17 @@ aclError TbeOpTask::ExecuteAsync(const int32_t numInputs, } // launch kernel - ACL_LOG_DEBUG("To launch kernel, stubFunc = %s, block dim = %u, arg size = %u", - reinterpret_cast(stubFunc_), blockDim_, argSize_); - const auto ret = rtKernelLaunch(stubFunc_, blockDim_, const_cast(args_.get()), - argSize_, nullptr, stream); - return ACL_GET_ERRCODE_RTS(ret); + ACL_LOG_DEBUG("To launch kernel, binHandle = %p, kernelName = %s, block dim = %u, arg size = %u", + binHandle_, reinterpret_cast(kernelName_), blockDim_, argSize_); + aclrtFuncHandle funcHandle = nullptr; + auto ret = aclrtBinaryGetFunction(binHandle_, static_cast(kernelName_), &funcHandle); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_ERROR("Fail to get func from binHandle = %p, kernelName = %p", + binHandle_, static_cast(kernelName_)); + return ret; + } + return rtsLaunchKernelWithHostArgs(funcHandle, blockDim_, stream, nullptr, const_cast(args_.get()), + argSize_, nullptr, 0U); } void OpTask::SetArgs(std::unique_ptr &&args, const uint32_t argSize) diff --git a/single_op/executor/op_task.h b/single_op/executor/op_task.h index f605b09e..1dc8ec8d 100755 --- a/single_op/executor/op_task.h +++ b/single_op/executor/op_task.h @@ -31,7 +31,7 @@ protected: class TbeOpTask : public OpTask { public: - TbeOpTask(const void *const stubFunction, const uint32_t block); + TbeOpTask(aclrtBinHandle binHandle, const void *kernelName, const uint32_t block); ~TbeOpTask() = default; aclError ExecuteAsync(const int32_t numInputs, @@ -41,7 +41,8 @@ public: const aclrtStream stream) override; private: - const void *stubFunc_; + aclrtBinHandle binHandle_; + const void *kernelName_; uint32_t blockDim_; }; } // namespace acl diff --git a/single_op/executor/stream_executor.cpp b/single_op/executor/stream_executor.cpp index f3fbb4b7..d3a278f4 100755 --- a/single_op/executor/stream_executor.cpp +++ b/single_op/executor/stream_executor.cpp @@ -69,7 +69,7 @@ aclError StreamExecutor::ExecuteAsync(OpKernelDesc &kernelDesc, { ACL_LOG_DEBUG("Start to execute op by dynamic kernel"); kernelDesc.timestamp = attr_utils::GetCurrentTimestamp(); - TbeOpTask task(kernelDesc.stubFunc, kernelDesc.blockDim); + TbeOpTask task(kernelDesc.binHandle, kernelDesc.kernelName, kernelDesc.blockDim); ACL_REQUIRES_OK(InitTbeTask(kernelDesc, numInputs, numOutputs, task)); ACL_REQUIRES_OK(task.ExecuteAsync(numInputs, inputs, numOutputs, outputs, stream_)); return ACL_SUCCESS; -- Gitee