diff --git a/single_op/compile/op_kernel_registry.cpp b/single_op/compile/op_kernel_registry.cpp index 469d37b53b82db340dd8d6a778c49fc7147d74c6..8851a636b9b742b764a43b619ce6f4c4ce1bd8d5 100644 --- a/single_op/compile/op_kernel_registry.cpp +++ b/single_op/compile/op_kernel_registry.cpp @@ -14,19 +14,21 @@ #include "utils/file_utils.h" #include "common/log_inner.h" #include "error_codes_inner.h" +#include "acl/acl_rt.h" namespace acl { namespace { const char_t *const STUB_NAME_PREFIX = "acl_dynamic_"; } // namespace -const void *OpKernelRegistry::GetStubFunc(const std::string &opType, const std::string &kernelId) +aclError OpKernelRegistry::GetKernelInfo(const std::string &opType, const std::string &kernelId, + aclrtBinHandle &binHandle, const void *&kernelName) { const std::lock_guard lk(mu_); std::map>>::const_iterator kernelsIter = kernels_.find(opType); if (kernelsIter == kernels_.cend()) { ACL_LOG_INNER_ERROR("[Find][OpType]No kernel was compiled for op = %s", opType.c_str()); - return nullptr; + return ACL_ERROR_INVALID_PARAM; } auto &kernelsOfOp = kernelsIter->second; @@ -34,22 +36,12 @@ const void *OpKernelRegistry::GetStubFunc(const std::string &opType, const std:: if (it == kernelsOfOp.cend()) { ACL_LOG_INNER_ERROR("[Find][KernelId]Kernel not compiled for opType = %s and kernelId = %s", opType.c_str(), kernelId.c_str()); - return nullptr; + return ACL_ERROR_INVALID_PARAM; } - return it->second->stubName.c_str(); -} - -OpKernelRegistry::~OpKernelRegistry() -{ - const std::lock_guard lk(mu_); - for (auto &kernelsOfOp : kernels_) { - ACL_LOG_DEBUG("To unregister kernel of op: %s", kernelsOfOp.first.c_str()); - for (auto &it : kernelsOfOp.second) { - ACL_LOG_DEBUG("To unregister bin by handle: %p, kernelId = %s", it.second->binHandle, it.first.c_str()); - (void)rtDevBinaryUnRegister(it.second->binHandle); - } - } + binHandle = it->second->binHandle; + kernelName = it->second->kernelName.c_str(); + return ACL_SUCCESS; } aclError OpKernelRegistry::Register(std::unique_ptr &®istration) @@ -62,41 +54,31 @@ aclError OpKernelRegistry::Register(std::unique_ptr &®i const auto iter = kernels_.find(registration->opType); if ((iter != kernels_.end()) && (iter->second.count(registration->kernelId) > 0U)) { ACL_LOG_INNER_ERROR("[Find][Kernel]Kernel already registered. kernelId = %s", - registration->kernelId.c_str()); + registration->kernelId.c_str()); return ACL_ERROR_KERNEL_ALREADY_REGISTERED; } registration->stubName = std::string(STUB_NAME_PREFIX); registration->stubName += registration->kernelId; - rtDevBinary_t binary; - binary.version = 0U; - binary.data = registration->binData; - binary.length = registration->binSize; + + aclrtBinHandle binHandle = nullptr; + aclrtBinaryLoadOptionValue val{}; if (registration->enginetype == ACL_ENGINE_AICORE) { - binary.magic = RT_DEV_BINARY_MAGIC_ELF; + val.magic = ACL_RT_BINARY_MAGIC_ELF_AICORE; } else if (registration->enginetype == ACL_ENGINE_VECTOR) { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + val.magic = ACL_RT_BINARY_MAGIC_ELF_VECTOR_CORE; } else { return ACL_ERROR_INVALID_PARAM; } - void *binHandle = nullptr; - const auto ret = rtDevBinaryRegister(&binary, &binHandle); - if (ret != RT_ERROR_NONE) { - ACL_LOG_CALL_ERROR("[Register][Dev]rtDevBinaryRegister failed, runtime errorCode = %d", - static_cast(ret)); - return ACL_GET_ERRCODE_RTS(ret); + aclrtBinaryLoadOption opt{ACL_RT_BINARY_LOAD_OPT_MAGIC, val}; + aclrtBinaryLoadOptions options{&opt, 1U}; + auto ret = aclrtBinaryLoadFromData(registration->binData, registration->binSize, &options, &binHandle); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_CALL_ERROR("[Register][Dev]aclrtBinaryLoadFromData failed. kernel name = %s, " + "runtime errorCode = %d", registration->kernelName.c_str(), + static_cast(ret)); + return ret; } - - const auto rtRet = rtFunctionRegister(binHandle, registration->stubName.c_str(), registration->stubName.c_str(), - registration->kernelName.c_str(), static_cast(FUNC_MODE_NORMAL)); - if (rtRet != RT_ERROR_NONE) { - (void)rtDevBinaryUnRegister(binHandle); - ACL_LOG_CALL_ERROR("[Register][Dev]rtFunctionRegister failed. bin key = %s, kernel name = %s, " - "runtime errorCode = %d", registration->stubName.c_str(), registration->kernelName.c_str(), - static_cast(rtRet)); - return ACL_GET_ERRCODE_RTS(rtRet); - } - registration->binHandle = binHandle; registration->deallocator = deallocator; (void)kernels_[registration->opType].emplace(registration->kernelId, std::move(registration)); diff --git a/single_op/compile/op_kernel_registry.h b/single_op/compile/op_kernel_registry.h index 891a9e5bd70a5bb5789086a99a08da8e9077eeaa..e550a46cb1f8bd787d44faa10a989802b2895178 100644 --- a/single_op/compile/op_kernel_registry.h +++ b/single_op/compile/op_kernel_registry.h @@ -18,6 +18,7 @@ #include #include "acl/acl_op.h" +#include "acl/acl_rt.h" namespace acl { struct OpKernelRegistration { @@ -37,12 +38,12 @@ struct OpKernelRegistration { void *binData = nullptr; uint64_t binSize = 0UL; void (*deallocator)(void *data, size_t length) = nullptr; - void *binHandle = nullptr; + aclrtBinHandle binHandle = nullptr; }; class OpKernelRegistry { public: - ~OpKernelRegistry(); + ~OpKernelRegistry() = default; static OpKernelRegistry &GetInstance() { @@ -50,7 +51,8 @@ public: return instance; } - const void *GetStubFunc(const std::string &opType, const std::string &kernelId); + aclError GetKernelInfo(const std::string &opType, const std::string &kernelId, + aclrtBinHandle &binHandle, const void *&kernelName); aclError Register(std::unique_ptr &®istration); diff --git a/single_op/compile/op_kernel_selector.cpp b/single_op/compile/op_kernel_selector.cpp index 6cc2674d0acfb526d8a36deb44a91e2e131b0b70..91fd26fe7bb1204a2ce2b64fc55847081c9ae885 100644 --- a/single_op/compile/op_kernel_selector.cpp +++ b/single_op/compile/op_kernel_selector.cpp @@ -79,16 +79,17 @@ aclError OpKernelSelector::SelectOpKernel(const AclOp &op) ACL_CHECK_MALLOC_RESULT(desc); ACL_REQUIRES_OK(InsertAclop2KernelDesc(op, desc)); ACL_LOG_DEBUG("To invoke select func, opType = %s", op.opType.c_str()); - const auto ret = func(op.numInputs, op.inputDesc, op.numOutputs, op.outputDesc, op.opAttr, desc.get()); + auto ret = func(op.numInputs, op.inputDesc, op.numOutputs, op.outputDesc, op.opAttr, desc.get()); if (ret != ACL_SUCCESS) { ACL_LOG_INNER_ERROR("[Call][Compile]Failed to call op compile, errorCode = %d", ret); return ret; } ACL_LOG_DEBUG("selecting kernel succeeded. kernelId = %s", desc->kernelId.c_str()); - desc->stubFunc = OpKernelRegistry::GetInstance().GetStubFunc(op.opType, desc->kernelId); - if (desc->stubFunc == nullptr) { - ACL_LOG_INNER_ERROR("Stub function not registered. kernelId = %s", desc->kernelId.c_str()); + ret = OpKernelRegistry::GetInstance().GetKernelInfo(op.opType, desc->kernelId, + desc->binHandle, desc->kernelName); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_INNER_ERROR("kernel not registered. kernelId = %s", desc->kernelId.c_str()); return ACL_ERROR_KERNEL_NOT_FOUND; } desc->timestamp = attr_utils::GetCurrentTimestamp(); diff --git a/single_op/compile/op_kernel_selector.h b/single_op/compile/op_kernel_selector.h index ae5af3228ceae01ec570a247538c76d244fb30fb..f3772cd32c37507c1b50e8070bb994c7cd779a38 100644 --- a/single_op/compile/op_kernel_selector.h +++ b/single_op/compile/op_kernel_selector.h @@ -23,7 +23,8 @@ struct aclopKernelDesc { std::string kernelId; - const void *stubFunc = nullptr; // no need for deallocating + aclrtBinHandle binHandle = nullptr; + const void *kernelName = nullptr; uint32_t blockDim = 0U; std::vector workspaceSizes; std::string extendArgs; diff --git a/single_op/executor/op_task.cpp b/single_op/executor/op_task.cpp index 14ed333c8f51691701197f082aea999b9467810f..b5fda9e3f11109bda714d1945f1721db2acefa92 100755 --- a/single_op/executor/op_task.cpp +++ b/single_op/executor/op_task.cpp @@ -10,11 +10,12 @@ #include "op_task.h" #include "runtime/rt.h" +#include "runtime/rts/rts_kernel.h" #include "error_codes_inner.h" namespace acl { -TbeOpTask::TbeOpTask(const void *const stubFunction, const uint32_t block) : OpTask(), - stubFunc_(stubFunction), blockDim_(block) +TbeOpTask::TbeOpTask(aclrtBinHandle binHandle, const void *kernelName, const uint32_t block) : OpTask(), + binHandle_(binHandle), kernelName_(kernelName), blockDim_(block) { } @@ -37,11 +38,17 @@ aclError TbeOpTask::ExecuteAsync(const int32_t numInputs, } // launch kernel - ACL_LOG_DEBUG("To launch kernel, stubFunc = %s, block dim = %u, arg size = %u", - reinterpret_cast(stubFunc_), blockDim_, argSize_); - const auto ret = rtKernelLaunch(stubFunc_, blockDim_, const_cast(args_.get()), - argSize_, nullptr, stream); - return ACL_GET_ERRCODE_RTS(ret); + ACL_LOG_DEBUG("To launch kernel, binHandle = %p, kernelName = %s, block dim = %u, arg size = %u", + binHandle_, reinterpret_cast(kernelName_), blockDim_, argSize_); + aclrtFuncHandle funcHandle = nullptr; + auto ret = aclrtBinaryGetFunction(binHandle_, static_cast(kernelName_), &funcHandle); + if (ret != ACL_ERROR_NONE) { + ACL_LOG_ERROR("Fail to get func from binHandle = %p, kernelName = %p", + binHandle_, static_cast(kernelName_)); + return ret; + } + return rtsLaunchKernelWithHostArgs(funcHandle, blockDim_, stream, nullptr, const_cast(args_.get()), + argSize_, nullptr, 0U); } void OpTask::SetArgs(std::unique_ptr &&args, const uint32_t argSize) diff --git a/single_op/executor/op_task.h b/single_op/executor/op_task.h index f605b09eba2e301cb967c9742eeb03315aca8eee..1dc8ec8df6063963ded4c5def1c99e0bf1403162 100755 --- a/single_op/executor/op_task.h +++ b/single_op/executor/op_task.h @@ -31,7 +31,7 @@ protected: class TbeOpTask : public OpTask { public: - TbeOpTask(const void *const stubFunction, const uint32_t block); + TbeOpTask(aclrtBinHandle binHandle, const void *kernelName, const uint32_t block); ~TbeOpTask() = default; aclError ExecuteAsync(const int32_t numInputs, @@ -41,7 +41,8 @@ public: const aclrtStream stream) override; private: - const void *stubFunc_; + aclrtBinHandle binHandle_; + const void *kernelName_; uint32_t blockDim_; }; } // namespace acl diff --git a/single_op/executor/stream_executor.cpp b/single_op/executor/stream_executor.cpp index f3fbb4b7d45c396b1c515fdb6f1ae815d18d6ac4..d3a278f4ad6c6a446b2ed4b4100a6c8d6079e061 100755 --- a/single_op/executor/stream_executor.cpp +++ b/single_op/executor/stream_executor.cpp @@ -69,7 +69,7 @@ aclError StreamExecutor::ExecuteAsync(OpKernelDesc &kernelDesc, { ACL_LOG_DEBUG("Start to execute op by dynamic kernel"); kernelDesc.timestamp = attr_utils::GetCurrentTimestamp(); - TbeOpTask task(kernelDesc.stubFunc, kernelDesc.blockDim); + TbeOpTask task(kernelDesc.binHandle, kernelDesc.kernelName, kernelDesc.blockDim); ACL_REQUIRES_OK(InitTbeTask(kernelDesc, numInputs, numOutputs, task)); ACL_REQUIRES_OK(task.ExecuteAsync(numInputs, inputs, numOutputs, outputs, stream_)); return ACL_SUCCESS;