diff --git a/frameworks/native/neural_network_core/cpp_type.h b/frameworks/native/neural_network_core/cpp_type.h index 82183790444a2fe89c8b77c27aad39245cbc2f22..18f2a3f01b2bad56bb711ebe1850b66af0ad6062 100644 --- a/frameworks/native/neural_network_core/cpp_type.h +++ b/frameworks/native/neural_network_core/cpp_type.h @@ -56,6 +56,7 @@ struct ExtensionConfig { std::vector> inputDims; std::vector> dynamicDims; bool isNpuFmShared = false; + bool isExceedRamLimit = false; }; struct ModelConfig { diff --git a/frameworks/native/neural_network_core/neural_network_core.cpp b/frameworks/native/neural_network_core/neural_network_core.cpp index 6e689f02fbed85726ca8548fdcad4d335f972fdf..0ca899af08b6493adfc411d23647d1d82a106cb7 100644 --- a/frameworks/native/neural_network_core/neural_network_core.cpp +++ b/frameworks/native/neural_network_core/neural_network_core.cpp @@ -510,7 +510,9 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc } else if (compilation->offlineModelPath != nullptr) { ret = nnrtService.CheckModelSizeFromPath(compilation->offlineModelPath, isExceedRamLimit); } else if (compilation->cachePath != nullptr) { - ret = nnrtService.CheckModelSizeFromPath(compilation->cachePath, isExceedRamLimit); + std::string modelName; + compilation->compiler->GetModelName(modelName); + ret = nnrtService.CheckModelSizeFromCache(compilation->cachePath, modelName, isExceedRamLimit); } else if ((compilation->offlineModelBuffer.first != nullptr) && \ (compilation->offlineModelBuffer.second != size_t(0))) { ret = nnrtService.CheckModelSizeFromBuffer( @@ -532,7 +534,7 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc return OH_NN_SUCCESS; } -OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation) +OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation, bool &isExceedRamLimit) { bool isExceedRamLimit = false; OH_NN_ReturnCode retCode = CheckExceedRamLimit(compilation, isExceedRamLimit); @@ -582,7 +584,7 @@ OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation) return OH_NN_SUCCESS; } -OH_NN_ReturnCode Authentication(Compilation** compilation) +OH_NN_ReturnCode Authentication(Compilation** compilation, bool &isExceedRamLimit) { if (compilation == nullptr) { LOGE("Authentication failed, compilation is nullptr."); @@ -601,7 +603,7 @@ OH_NN_ReturnCode Authentication(Compilation** compilation) return OH_NN_SUCCESS; } - OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl); + OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl, isExceedRamLimit); if (ret != OH_NN_SUCCESS) { LOGE("Authentication failed, fail to authenticate model."); return ret; @@ -732,12 +734,26 @@ NNRT_API OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation) return ret; } - ret = Authentication(&compilationImpl); + bool isExceedRamLimit = false; + ret = Authentication(&compilationImpl, isExceedRamLimit); if (ret != OH_NN_SUCCESS) { LOGE("OH_NNCompilation_Build failed, fail to create compiler."); return ret; } + std::unordered_map> configs; + LOGI("[OH_NNCompilation_Build] isExceedRamLimit: %{public}d", static_cast(isExceedRamLimit)); + + std::vector configContents; + if (isExceedRamLimit) { + configContents.push_back('1'); + } else { + configContents.push_back('0'); + } + + configs["isExceedRamLimit"] = configContents; + compilationImpl->compiler->SetExtensionConfig(configs); + bool isBuild = compilationImpl->compiler->IsBuild(); if (isBuild) { LOGE("OH_NNCompilation_Build failed, compilation has been built, don't build again."); diff --git a/frameworks/native/neural_network_core/nnrt_client.cpp b/frameworks/native/neural_network_core/nnrt_client.cpp index c5e188397d1538812757bea47a74dc53b529dc7a..618efe1e5f28f06d642087651618ac9d5cf50fd1 100644 --- a/frameworks/native/neural_network_core/nnrt_client.cpp +++ b/frameworks/native/neural_network_core/nnrt_client.cpp @@ -58,6 +58,7 @@ NNRtServiceApi& NNRtServiceApi::GetInstance() } LoadFunction(libNNRtService, "CheckModelSizeFromPath", &nnrtService.CheckModelSizeFromPath); + LoadFunction(libNNRtService, "CheckModelSizeFromCache", &nnrtService.CheckModelSizeFromCache); LoadFunction(libNNRtService, "CheckModelSizeFromBuffer", &nnrtService.CheckModelSizeFromBuffer); LoadFunction(libNNRtService, "CheckModelSizeFromModel", &nnrtService.CheckModelSizeFromModel); LoadFunction(libNNRtService, "GetNNRtModelIDFromPath", &nnrtService.GetNNRtModelIDFromPath); diff --git a/frameworks/native/neural_network_core/nnrt_client.h b/frameworks/native/neural_network_core/nnrt_client.h index 628ab63339f71a4631e7446ca239b55ff02b53a6..8b87c900b4d4410b2df1609183630b038d55d3b1 100644 --- a/frameworks/native/neural_network_core/nnrt_client.h +++ b/frameworks/native/neural_network_core/nnrt_client.h @@ -27,6 +27,7 @@ public: bool IsServiceAvaliable() const; int (*CheckModelSizeFromPath)(const char* path, bool& exceedLimit) = nullptr; + int (*CheckModelSizeFromCache)(const char* path, const std::string& modelName, bool& exceedLimit) = nullptr; int (*CheckModelSizeFromBuffer)(const void* buffer, size_t size, bool& exceedLimit) = nullptr; int (*CheckModelSizeFromModel)(void* model, bool& exceedLimit) = nullptr; size_t (*GetNNRtModelIDFromPath)(const char*) = nullptr; diff --git a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp index 4ab5d6a09080aaf449d05629452b5763a336c696..6c63925aa9d949e3262ff2f052f510301949135b 100644 --- a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp +++ b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp @@ -41,6 +41,7 @@ const std::string EXTENSION_KEY_OP_LAYOUT = "opLayout"; const std::string EXTENSION_KEY_INPUT_DIMS = "InputDims"; const std::string EXTENSION_KEY_DYNAMIC_DIMS = "DynamicDims"; const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED"; +const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit"; const std::string NULL_HARDWARE_NAME = "default"; const std::string HARDWARE_NAME = "const.ai.nnrt_deivce"; diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp index 498fc5ac7c0cd5751567c7a60484d0812eb20898..68e50586dd9b66f7239bd7cc465dbf8bee8f43a6 100644 --- a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp +++ b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp @@ -27,9 +27,9 @@ namespace OHOS { namespace NeuralNetworkRuntime { -constexpr int32_t MAX_MODEL_SIZE = 500 * 1024 * 1024; // 200MB constexpr int32_t NULL_PTR_LENGTH = 0; constexpr int32_t NUMBER_CACHE_INFO_MEMBERS = 3; +constexpr int32_t NUMBER_CACHE_INFO_EXTENSION_MEMBERS = 2; constexpr int32_t HEX_UNIT = 16; constexpr char ROOT_DIR_STR = '/'; constexpr char DOUBLE_SLASH_STR[] = "//"; @@ -41,6 +41,7 @@ OH_NN_ReturnCode NNCompiledCache::Save(const std::vector(m_isExceedRamLimit)); if (caches.empty()) { LOGE("[NNCompiledCache] Save failed, caches is empty."); return OH_NN_INVALID_PARAMETER; @@ -156,12 +157,17 @@ void NNCompiledCache::SetModelName(const std::string& modelName) m_modelName = modelName; } +void NNCompiledCache::SetIsExceedRamLimit(const bool isExceedRamLimit) +{ + m_isExceedRamLimit = isExceedRamLimit; +} + OH_NN_ReturnCode NNCompiledCache::GenerateCacheFiles(const std::vector& caches, const std::string& cacheDir, uint32_t version) const { const size_t cacheNumber = caches.size(); - uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + 1; + uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + NUMBER_CACHE_INFO_EXTENSION_MEMBERS; std::unique_ptr cacheInfo = CreateUniquePtr(cacheSize); if (cacheInfo == nullptr) { LOGE("[NNCompiledCache] GenerateCacheFiles failed, fail to create cacheInfo instance."); @@ -245,6 +251,13 @@ OH_NN_ReturnCode NNCompiledCache::GenerateCacheModel(const std::vector(m_isExceedRamLimit)); + if (m_isExceedRamLimit) { + *cacheInfoPtr++ = 1; + } else { + *cacheInfoPtr++ = 0; + } + return OH_NN_SUCCESS; } @@ -422,7 +435,7 @@ OH_NN_ReturnCode NNCompiledCache::GetCacheFileLength(FILE* pFile, long& fileSize return OH_NN_INVALID_FILE; } - if ((handleValue > MAX_MODEL_SIZE) || (handleValue == NULL_PTR_LENGTH)) { + if (handleValue == NULL_PTR_LENGTH) { LOGE("[NNCompiledCache] GetCacheFileLength failed, unable to read huge or empty input stream, " "get cache file size=%{public}ld", handleValue); diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.h b/frameworks/native/neural_network_runtime/nncompiled_cache.h index df84886a564a9bf78200ca8afa4699dbb2ae21e5..1cbcdc9d3e736af002c5b40847b589dd580cf0e6 100644 --- a/frameworks/native/neural_network_runtime/nncompiled_cache.h +++ b/frameworks/native/neural_network_runtime/nncompiled_cache.h @@ -34,6 +34,7 @@ struct NNCompiledCacheInfo { int64_t deviceId{0}; std::vector modelCheckSum; int64_t opVersion{0}; + int64_t isExceedRamLimit{0}; }; class NNCompiledCache { @@ -50,6 +51,7 @@ public: OH_NN_ReturnCode SetBackend(size_t backendID); void SetModelName(const std::string& modelName); + void SetIsExceedRamLimit(const bool isExceedRamLimit); OH_NN_ReturnCode WriteCacheInfo(uint32_t cacheSize, std::unique_ptr& cacheInfo, const std::string& cacheDir) const; @@ -72,6 +74,7 @@ private: size_t m_backendID {0}; std::string m_modelName; std::shared_ptr m_device {nullptr}; + bool m_isExceedRamLimit {false}; }; } // namespace NeuralNetworkRuntime diff --git a/frameworks/native/neural_network_runtime/nncompiler.cpp b/frameworks/native/neural_network_runtime/nncompiler.cpp index d1aa30e03e8e4e2887f2940d952587462874baa3..fbd5cbfc1f5a42c6c2a4618c20c996fd433de2e3 100644 --- a/frameworks/native/neural_network_runtime/nncompiler.cpp +++ b/frameworks/native/neural_network_runtime/nncompiler.cpp @@ -32,6 +32,7 @@ const int CACHE_OUTPUT_TENSORDESC_OFFSET = 1; constexpr int32_t NUMBER_CACHE_INFO_MEMBERS = 3; const std::string EXTENSION_KEY_MODEL_NAME = "ModelName"; const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED"; +const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit"; const int OPVERSION_SUBSTR_NUM = 2; const std::string CURRENT_VERSION = "0x00000000"; const std::string HIAI_VERSION_PATH = "/data/data/hiai/version"; @@ -558,6 +559,7 @@ OH_NN_ReturnCode NNCompiler::SaveToCacheFile() const tensorBuffers.emplace_back(outputTensorDescBuffer); compiledCache.SetModelName(m_extensionConfig.modelName); + compiledCache.SetIsExceedRamLimit(m_extensionConfig.isExceedRamLimit); ret = compiledCache.Save(caches, m_cachePath, m_cacheVersion); if (ret != OH_NN_SUCCESS) { LOGE("[NNCompiler] SaveToCacheFile failed, error happened when saving model cache."); @@ -725,6 +727,21 @@ OH_NN_ReturnCode NNCompiler::SetExtensionConfig(const std::unordered_map value = configs.at(EXTENSION_KEY_IS_EXCEED_RAMLIMIT); + if (value.empty()) { + LOGE("[NNCompiler] SetExtensionConfig get empty model name from configs"); + return OH_NN_INVALID_PARAMETER; + } + + if (value[0] == '1') { + m_extensionConfig.isExceedRamLimit = true; + } else { + m_extensionConfig.isExceedRamLimit = false; + } + + LOGI("[NNCompiler] SetExtensionConfig isExceedRamLimit enabled."); + } return OH_NN_SUCCESS; }