diff --git a/frameworks/native/neural_network_core/cpp_type.h b/frameworks/native/neural_network_core/cpp_type.h index 82183790444a2fe89c8b77c27aad39245cbc2f22..18f2a3f01b2bad56bb711ebe1850b66af0ad6062 100644 --- a/frameworks/native/neural_network_core/cpp_type.h +++ b/frameworks/native/neural_network_core/cpp_type.h @@ -56,6 +56,7 @@ struct ExtensionConfig { std::vector> inputDims; std::vector> dynamicDims; bool isNpuFmShared = false; + bool isExceedRamLimit = false; }; struct ModelConfig { diff --git a/frameworks/native/neural_network_core/neural_network_core.cpp b/frameworks/native/neural_network_core/neural_network_core.cpp index 6e689f02fbed85726ca8548fdcad4d335f972fdf..276aa820b98db4c97e6f750999b05ed137da0414 100644 --- a/frameworks/native/neural_network_core/neural_network_core.cpp +++ b/frameworks/native/neural_network_core/neural_network_core.cpp @@ -510,7 +510,9 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc } else if (compilation->offlineModelPath != nullptr) { ret = nnrtService.CheckModelSizeFromPath(compilation->offlineModelPath, isExceedRamLimit); } else if (compilation->cachePath != nullptr) { - ret = nnrtService.CheckModelSizeFromPath(compilation->cachePath, isExceedRamLimit); + std::string modelName; + compilation->compiler->GetModelName(modelName); + ret = nnrtService.CheckModelSizeFromCache(compilation->cachePath, modelName, isExceedRamLimit); } else if ((compilation->offlineModelBuffer.first != nullptr) && \ (compilation->offlineModelBuffer.second != size_t(0))) { ret = nnrtService.CheckModelSizeFromBuffer( @@ -532,9 +534,8 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc return OH_NN_SUCCESS; } -OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation) +OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation, bool &isExceedRamLimit) { - bool isExceedRamLimit = false; OH_NN_ReturnCode retCode = CheckExceedRamLimit(compilation, isExceedRamLimit); if (retCode != OH_NN_SUCCESS) { LOGE("AuthenticateModel failed, fail to check if model exceed ram limit."); @@ -582,7 +583,7 @@ OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation) return OH_NN_SUCCESS; } -OH_NN_ReturnCode Authentication(Compilation** compilation) +OH_NN_ReturnCode Authentication(Compilation** compilation, bool &isExceedRamLimit) { if (compilation == nullptr) { LOGE("Authentication failed, compilation is nullptr."); @@ -601,7 +602,7 @@ OH_NN_ReturnCode Authentication(Compilation** compilation) return OH_NN_SUCCESS; } - OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl); + OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl, isExceedRamLimit); if (ret != OH_NN_SUCCESS) { LOGE("Authentication failed, fail to authenticate model."); return ret; @@ -732,12 +733,26 @@ NNRT_API OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation) return ret; } - ret = Authentication(&compilationImpl); + bool isExceedRamLimit = false; + ret = Authentication(&compilationImpl, isExceedRamLimit); if (ret != OH_NN_SUCCESS) { LOGE("OH_NNCompilation_Build failed, fail to create compiler."); return ret; } + std::unordered_map> configs; + LOGI("[OH_NNCompilation_Build] model isExceedRamLimit: %{public}d", static_cast(isExceedRamLimit)); + + std::vector configContents; + if (isExceedRamLimit) { + configContents.push_back('1'); + } else { + configContents.push_back('0'); + } + + configs["isExceedRamLimit"] = configContents; + compilationImpl->compiler->SetExtensionConfig(configs); + bool isBuild = compilationImpl->compiler->IsBuild(); if (isBuild) { LOGE("OH_NNCompilation_Build failed, compilation has been built, don't build again."); diff --git a/frameworks/native/neural_network_core/nnrt_client.cpp b/frameworks/native/neural_network_core/nnrt_client.cpp index c5e188397d1538812757bea47a74dc53b529dc7a..618efe1e5f28f06d642087651618ac9d5cf50fd1 100644 --- a/frameworks/native/neural_network_core/nnrt_client.cpp +++ b/frameworks/native/neural_network_core/nnrt_client.cpp @@ -58,6 +58,7 @@ NNRtServiceApi& NNRtServiceApi::GetInstance() } LoadFunction(libNNRtService, "CheckModelSizeFromPath", &nnrtService.CheckModelSizeFromPath); + LoadFunction(libNNRtService, "CheckModelSizeFromCache", &nnrtService.CheckModelSizeFromCache); LoadFunction(libNNRtService, "CheckModelSizeFromBuffer", &nnrtService.CheckModelSizeFromBuffer); LoadFunction(libNNRtService, "CheckModelSizeFromModel", &nnrtService.CheckModelSizeFromModel); LoadFunction(libNNRtService, "GetNNRtModelIDFromPath", &nnrtService.GetNNRtModelIDFromPath); diff --git a/frameworks/native/neural_network_core/nnrt_client.h b/frameworks/native/neural_network_core/nnrt_client.h index 628ab63339f71a4631e7446ca239b55ff02b53a6..8b87c900b4d4410b2df1609183630b038d55d3b1 100644 --- a/frameworks/native/neural_network_core/nnrt_client.h +++ b/frameworks/native/neural_network_core/nnrt_client.h @@ -27,6 +27,7 @@ public: bool IsServiceAvaliable() const; int (*CheckModelSizeFromPath)(const char* path, bool& exceedLimit) = nullptr; + int (*CheckModelSizeFromCache)(const char* path, const std::string& modelName, bool& exceedLimit) = nullptr; int (*CheckModelSizeFromBuffer)(const void* buffer, size_t size, bool& exceedLimit) = nullptr; int (*CheckModelSizeFromModel)(void* model, bool& exceedLimit) = nullptr; size_t (*GetNNRtModelIDFromPath)(const char*) = nullptr; diff --git a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp index 93202275243c72981d37fdb623df17c8bfb2c426..111cce2dc477d8d327a080416e0f71e6f76bc960 100644 --- a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp +++ b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp @@ -42,6 +42,7 @@ const std::string EXTENSION_KEY_OP_LAYOUT = "opLayout"; const std::string EXTENSION_KEY_INPUT_DIMS = "InputDims"; const std::string EXTENSION_KEY_DYNAMIC_DIMS = "DynamicDims"; const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED"; +const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit"; const std::string NULL_HARDWARE_NAME = "default"; const std::string HARDWARE_NAME = "const.ai.nnrt_deivce"; diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp index 23ed62c8ca539e903b39f3912497f2bc07ef6aa3..ef71bbf0e3e48c89a8f3a00f84a22c03198a5ac8 100644 --- a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp +++ b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp @@ -27,9 +27,9 @@ namespace OHOS { namespace NeuralNetworkRuntime { -constexpr int32_t MAX_MODEL_SIZE = 500 * 1024 * 1024; // 200MB constexpr int32_t NULL_PTR_LENGTH = 0; constexpr int32_t NUMBER_CACHE_INFO_MEMBERS = 3; +constexpr int32_t NUMBER_CACHE_INFO_EXTENSION_MEMBERS = 2; constexpr int32_t HEX_UNIT = 16; constexpr char ROOT_DIR_STR = '/'; constexpr char DOUBLE_SLASH_STR[] = "//"; @@ -41,6 +41,7 @@ OH_NN_ReturnCode NNCompiledCache::Save(const std::vector(m_isExceedRamLimit)); if (caches.empty()) { LOGE("[NNCompiledCache] Save failed, caches is empty."); return OH_NN_INVALID_PARAMETER; @@ -156,12 +157,17 @@ void NNCompiledCache::SetModelName(const std::string& modelName) m_modelName = modelName; } +void NNCompiledCache::SetIsExceedRamLimit(const bool isExceedRamLimit) +{ + m_isExceedRamLimit = isExceedRamLimit; +} + OH_NN_ReturnCode NNCompiledCache::GenerateCacheFiles(const std::vector& caches, const std::string& cacheDir, uint32_t version) const { const size_t cacheNumber = caches.size(); - uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + 1; + uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + NUMBER_CACHE_INFO_EXTENSION_MEMBERS; std::unique_ptr cacheInfo = CreateUniquePtr(cacheSize); if (cacheInfo == nullptr) { LOGE("[NNCompiledCache] GenerateCacheFiles failed, fail to create cacheInfo instance."); @@ -249,6 +255,13 @@ OH_NN_ReturnCode NNCompiledCache::GenerateCacheModel(const std::vector(m_isExceedRamLimit)); + if (m_isExceedRamLimit) { + *cacheInfoPtr++ = 1; + } else { + *cacheInfoPtr++ = 0; + } + return OH_NN_SUCCESS; } @@ -426,7 +439,7 @@ OH_NN_ReturnCode NNCompiledCache::GetCacheFileLength(FILE* pFile, long& fileSize return OH_NN_INVALID_FILE; } - if ((handleValue > MAX_MODEL_SIZE) || (handleValue == NULL_PTR_LENGTH)) { + if (handleValue == NULL_PTR_LENGTH) { LOGE("[NNCompiledCache] GetCacheFileLength failed, unable to read huge or empty input stream, " "get cache file size=%{public}ld", handleValue); diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.h b/frameworks/native/neural_network_runtime/nncompiled_cache.h index fff2c1f50ef1315156401ba6c949065b45e8e337..3c59c23c9d6e8c0e4ca1ef37e7db2bcb910c2d4d 100644 --- a/frameworks/native/neural_network_runtime/nncompiled_cache.h +++ b/frameworks/native/neural_network_runtime/nncompiled_cache.h @@ -35,6 +35,7 @@ struct NNCompiledCacheInfo { int64_t deviceId{0}; std::vector modelCheckSum; int64_t opVersion{0}; + int64_t isExceedRamLimit{0}; }; class NNCompiledCache { @@ -51,6 +52,7 @@ public: OH_NN_ReturnCode SetBackend(size_t backendID); void SetModelName(const std::string& modelName); + void SetIsExceedRamLimit(const bool isExceedRamLimit); OH_NN_ReturnCode WriteCacheInfo(uint32_t cacheSize, std::unique_ptr& cacheInfo, const std::string& cacheDir) const; @@ -73,6 +75,7 @@ private: size_t m_backendID {0}; std::string m_modelName; std::shared_ptr m_device {nullptr}; + bool m_isExceedRamLimit {false}; }; } // namespace NeuralNetworkRuntime diff --git a/frameworks/native/neural_network_runtime/nncompiler.cpp b/frameworks/native/neural_network_runtime/nncompiler.cpp index ed72e0808baf5e378f1c376b1c70d070b7b54871..76e817e4f5167641404cf690db76e7d0b0e44f19 100644 --- a/frameworks/native/neural_network_runtime/nncompiler.cpp +++ b/frameworks/native/neural_network_runtime/nncompiler.cpp @@ -30,8 +30,10 @@ namespace { const int CACHE_INPUT_TENSORDESC_OFFSET = 2; const int CACHE_OUTPUT_TENSORDESC_OFFSET = 1; constexpr int32_t NUMBER_CACHE_INFO_MEMBERS = 3; +constexpr int32_t NUMBER_CACHE_INFO_EXTENSION_MEMBERS = 2; const std::string EXTENSION_KEY_MODEL_NAME = "ModelName"; const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED"; +const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit"; const int OPVERSION_SUBSTR_NUM = 2; const std::string CURRENT_VERSION = "0x00000000"; const std::string HIAI_VERSION_PATH = "/data/data/hiai/version"; @@ -565,6 +567,7 @@ OH_NN_ReturnCode NNCompiler::SaveToCacheFile() const compiledCache.SetModelName(m_extensionConfig.modelName); ret = compiledCache.Save(caches, m_cachePath, m_cacheVersion); + compiledCache.SetIsExceedRamLimit(m_extensionConfig.isExceedRamLimit); if (ret != OH_NN_SUCCESS) { LOGE("[NNCompiler] SaveToCacheFile failed, error happened when saving model cache."); ReleaseBuffer(tensorBuffers); @@ -668,7 +671,7 @@ OH_NN_ReturnCode NNCompiler::RestoreFromCacheFile() if (currentOpVersion > modelCacheInfo.opVersion) { const size_t cacheNumber = caches.size(); - uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + 1; + uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + NUMBER_CACHE_INFO_EXTENSION_MEMBERS; uint32_t infoCharNumber = cacheSize * sizeof(int64_t); std::unique_ptr cacheInfo = CreateUniquePtr(cacheSize); @@ -731,6 +734,21 @@ OH_NN_ReturnCode NNCompiler::SetExtensionConfig(const std::unordered_map value = configs.at(EXTENSION_KEY_IS_EXCEED_RAMLIMIT); + if (value.empty()) { + LOGE("[NNCompiler] SetExtensionConfig get empty model name from configs"); + return OH_NN_INVALID_PARAMETER; + } + + if (value[0] == '1') { + m_extensionConfig.isExceedRamLimit = true; + } else { + m_extensionConfig.isExceedRamLimit = false; + } + + LOGI("[NNCompiler] SetExtensionConfig isExceedRamLimit enabled."); + } return OH_NN_SUCCESS; }