From f113b86a8f0214f2d50c5b7546401be678167f77 Mon Sep 17 00:00:00 2001
From: w30052974 <wangyifan94@huawei.com>
Date: Fri, 10 Jan 2025 17:57:48 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E9=89=B4=E6=9D=83=E9=9C=80=E6=B1=82?=
 =?UTF-8?q?=E5=90=88=E5=85=A5master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: w30052974 <wangyifan94@huawei.com>
---
 .../native/neural_network_core/cpp_type.h     |  1 +
 .../neural_network_core.cpp                   | 27 ++++++++++++++-----
 .../neural_network_core/nnrt_client.cpp       |  1 +
 .../native/neural_network_core/nnrt_client.h  |  1 +
 .../neural_network_runtime.cpp                |  1 +
 .../nncompiled_cache.cpp                      | 19 ++++++++++---
 .../neural_network_runtime/nncompiled_cache.h |  3 +++
 .../neural_network_runtime/nncompiler.cpp     | 20 +++++++++++++-
 8 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/frameworks/native/neural_network_core/cpp_type.h b/frameworks/native/neural_network_core/cpp_type.h
index 8218379..18f2a3f 100644
--- a/frameworks/native/neural_network_core/cpp_type.h
+++ b/frameworks/native/neural_network_core/cpp_type.h
@@ -56,6 +56,7 @@ struct ExtensionConfig {
     std::vector<std::vector<int32_t>> inputDims;
     std::vector<std::vector<int32_t>> dynamicDims;
     bool isNpuFmShared = false;
+    bool isExceedRamLimit = false;
 };
 
 struct ModelConfig {
diff --git a/frameworks/native/neural_network_core/neural_network_core.cpp b/frameworks/native/neural_network_core/neural_network_core.cpp
index 6e689f0..88a9191 100644
--- a/frameworks/native/neural_network_core/neural_network_core.cpp
+++ b/frameworks/native/neural_network_core/neural_network_core.cpp
@@ -510,7 +510,9 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc
     } else if (compilation->offlineModelPath != nullptr) {
         ret = nnrtService.CheckModelSizeFromPath(compilation->offlineModelPath, isExceedRamLimit);
     } else if (compilation->cachePath != nullptr) {
-        ret = nnrtService.CheckModelSizeFromPath(compilation->cachePath, isExceedRamLimit);
+        std::string modelName;
+        compilation->compiler->GetModelName(modelName);
+        ret = nnrtService.CheckModelSizeFromPath(compilation->cachePath, modelName, isExceedRamLimit);
     } else if ((compilation->offlineModelBuffer.first != nullptr) && \
                (compilation->offlineModelBuffer.second != size_t(0))) {
         ret = nnrtService.CheckModelSizeFromBuffer(
@@ -532,9 +534,8 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc
     return OH_NN_SUCCESS;
 }
 
-OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation)
+OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation, bool &isExceedRamLimit)
 {
-    bool isExceedRamLimit = false;
     OH_NN_ReturnCode retCode = CheckExceedRamLimit(compilation, isExceedRamLimit);
     if (retCode != OH_NN_SUCCESS) {
         LOGE("AuthenticateModel failed, fail to check if model exceed ram limit.");
@@ -582,7 +583,7 @@ OH_NN_ReturnCode AuthenticateModel(const Compilation* compilation)
     return OH_NN_SUCCESS;
 }
 
-OH_NN_ReturnCode Authentication(Compilation** compilation)
+OH_NN_ReturnCode Authentication(Compilation** compilation, bool &isExceedRamLimit)
 {
     if (compilation == nullptr) {
         LOGE("Authentication failed, compilation is nullptr.");
@@ -601,7 +602,7 @@ OH_NN_ReturnCode Authentication(Compilation** compilation)
         return OH_NN_SUCCESS;
     }
 
-    OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl);
+    OH_NN_ReturnCode ret = AuthenticateModel(compilationImpl, isExceedRamLimit);
     if (ret != OH_NN_SUCCESS) {
         LOGE("Authentication failed, fail to authenticate model.");
         return ret;
@@ -732,12 +733,26 @@ NNRT_API OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation)
         return ret;
     }
 
-    ret = Authentication(&compilationImpl);
+    bool isExceedRamLimit = false;
+    ret = Authentication(&compilationImpl, isExceedRamLimit);
     if (ret != OH_NN_SUCCESS) {
         LOGE("OH_NNCompilation_Build failed, fail to create compiler.");
         return ret;
     }
 
+    std::unordered_map<std::string, std::vector<char>> configs;
+    LOGI("[OH_NNCompilation_Build] isExceedRamLimit: %{public}d", static_cast<int>(isExceedRamLimit));
+
+    std::vector<char> configContents;
+    if (isExceedRamLimit) {
+        configContents.push_back('1');
+    } else {
+        configContents.push_back('0');
+    }
+
+    configs["isExceedRamLimit"] = configContents;
+    compilationImpl->compiler->SetExtensionConfig(configs);
+
     bool isBuild = compilationImpl->compiler->IsBuild();
     if (isBuild) {
         LOGE("OH_NNCompilation_Build failed, compilation has been built, don't build again.");
diff --git a/frameworks/native/neural_network_core/nnrt_client.cpp b/frameworks/native/neural_network_core/nnrt_client.cpp
index c5e1883..618efe1 100644
--- a/frameworks/native/neural_network_core/nnrt_client.cpp
+++ b/frameworks/native/neural_network_core/nnrt_client.cpp
@@ -58,6 +58,7 @@ NNRtServiceApi& NNRtServiceApi::GetInstance()
     }
 
     LoadFunction(libNNRtService, "CheckModelSizeFromPath", &nnrtService.CheckModelSizeFromPath);
+    LoadFunction(libNNRtService, "CheckModelSizeFromCache", &nnrtService.CheckModelSizeFromCache);
     LoadFunction(libNNRtService, "CheckModelSizeFromBuffer", &nnrtService.CheckModelSizeFromBuffer);
     LoadFunction(libNNRtService, "CheckModelSizeFromModel", &nnrtService.CheckModelSizeFromModel);
     LoadFunction(libNNRtService, "GetNNRtModelIDFromPath", &nnrtService.GetNNRtModelIDFromPath);
diff --git a/frameworks/native/neural_network_core/nnrt_client.h b/frameworks/native/neural_network_core/nnrt_client.h
index 628ab63..8b87c90 100644
--- a/frameworks/native/neural_network_core/nnrt_client.h
+++ b/frameworks/native/neural_network_core/nnrt_client.h
@@ -27,6 +27,7 @@ public:
     bool IsServiceAvaliable() const;
 
     int (*CheckModelSizeFromPath)(const char* path, bool& exceedLimit) = nullptr;
+    int (*CheckModelSizeFromCache)(const char* path, const std::string& modelName, bool& exceedLimit) = nullptr;
     int (*CheckModelSizeFromBuffer)(const void* buffer, size_t size, bool& exceedLimit) = nullptr;
     int (*CheckModelSizeFromModel)(void* model, bool& exceedLimit) = nullptr;
     size_t (*GetNNRtModelIDFromPath)(const char*) = nullptr;
diff --git a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp
index 9320227..111cce2 100644
--- a/frameworks/native/neural_network_runtime/neural_network_runtime.cpp
+++ b/frameworks/native/neural_network_runtime/neural_network_runtime.cpp
@@ -42,6 +42,7 @@ const std::string EXTENSION_KEY_OP_LAYOUT = "opLayout";
 const std::string EXTENSION_KEY_INPUT_DIMS = "InputDims";
 const std::string EXTENSION_KEY_DYNAMIC_DIMS = "DynamicDims";
 const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED";
+const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit";
 
 const std::string NULL_HARDWARE_NAME = "default";
 const std::string HARDWARE_NAME = "const.ai.nnrt_deivce";
diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
index 23ed62c..3fbac13 100644
--- a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
+++ b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
@@ -27,9 +27,9 @@
 
 namespace OHOS {
 namespace NeuralNetworkRuntime {
-constexpr int32_t MAX_MODEL_SIZE = 500 * 1024 * 1024; // 200MB
 constexpr int32_t NULL_PTR_LENGTH = 0;
 constexpr int32_t NUMBER_CACHE_INFO_MEMBERS = 3;
+constexpr int32_t NUMBER_CACHE_INFO_EXTENSION_MEMBERS = 2;
 constexpr int32_t HEX_UNIT = 16;
 constexpr char ROOT_DIR_STR = '/';
 constexpr char DOUBLE_SLASH_STR[] = "//";
@@ -41,6 +41,7 @@ OH_NN_ReturnCode NNCompiledCache::Save(const std::vector<OHOS::NeuralNetworkRunt
                                        const std::string& cacheDir,
                                        uint32_t version)
 {
+    LOGI("[NNCompiledCache::Save] m_isExceedRamLimit: %{public}d", static_cast<int>(m_isExceedRamLimit));
     if (caches.empty()) {
         LOGE("[NNCompiledCache] Save failed, caches is empty.");
         return OH_NN_INVALID_PARAMETER;
@@ -156,12 +157,17 @@ void NNCompiledCache::SetModelName(const std::string& modelName)
     m_modelName = modelName;
 }
 
+void NNCompiledCache::SetIsExceedRamLimit(const bool isExceedRamLimit)
+{
+    m_isExceedRamLimit = isExceedRamLimit;
+}
+
 OH_NN_ReturnCode NNCompiledCache::GenerateCacheFiles(const std::vector<OHOS::NeuralNetworkRuntime::Buffer>& caches,
                                                      const std::string& cacheDir,
                                                      uint32_t version) const
 {
     const size_t cacheNumber = caches.size();
-    uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + 1;
+    uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + NUMBER_CACHE_INFO_EXTENSION_MEMBERS;
     std::unique_ptr<int64_t[]> cacheInfo = CreateUniquePtr<int64_t[]>(cacheSize);
     if (cacheInfo == nullptr) {
         LOGE("[NNCompiledCache] GenerateCacheFiles failed, fail to create cacheInfo instance.");
@@ -249,6 +255,13 @@ OH_NN_ReturnCode NNCompiledCache::GenerateCacheModel(const std::vector<OHOS::Neu
     int currentOpVersion = std::stoi(currentVersion.substr(OPVERSION_SUBSTR_NUM));
     *cacheInfoPtr++ = currentOpVersion;
 
+    LOGI("[NNCompiledCache::GenerateCacheModel] m_isExceedRamLimit: %{public}d", static_cast<int>(m_isExceedRamLimit));
+    if (m_isExceedRamLimit) {
+        *cacheInfoPtr++ = 1;
+    } else {
+        *cacheInfoPtr++ = 0;
+    }
+
     return OH_NN_SUCCESS;
 }
 
@@ -426,7 +439,7 @@ OH_NN_ReturnCode NNCompiledCache::GetCacheFileLength(FILE* pFile, long& fileSize
         return OH_NN_INVALID_FILE;
     }
 
-    if ((handleValue > MAX_MODEL_SIZE) || (handleValue == NULL_PTR_LENGTH)) {
+    if ((handleValue == NULL_PTR_LENGTH)) {
         LOGE("[NNCompiledCache] GetCacheFileLength failed, unable to read huge or empty input stream, "
              "get cache file size=%{public}ld",
              handleValue);
diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.h b/frameworks/native/neural_network_runtime/nncompiled_cache.h
index fff2c1f..3c59c23 100644
--- a/frameworks/native/neural_network_runtime/nncompiled_cache.h
+++ b/frameworks/native/neural_network_runtime/nncompiled_cache.h
@@ -35,6 +35,7 @@ struct NNCompiledCacheInfo {
     int64_t deviceId{0};
     std::vector<unsigned short> modelCheckSum;
     int64_t opVersion{0};
+    int64_t isExceedRamLimit{0};
 };
 
 class NNCompiledCache {
@@ -51,6 +52,7 @@ public:
 
     OH_NN_ReturnCode SetBackend(size_t backendID);
     void SetModelName(const std::string& modelName);
+    void SetIsExceedRamLimit(const bool isExceedRamLimit);
     OH_NN_ReturnCode WriteCacheInfo(uint32_t cacheSize,
                                     std::unique_ptr<int64_t[]>& cacheInfo,
                                     const std::string& cacheDir) const;
@@ -73,6 +75,7 @@ private:
     size_t m_backendID {0};
     std::string m_modelName;
     std::shared_ptr<Device> m_device {nullptr};
+    bool m_isExceedRamLimit {false};
 };
 
 } // namespace NeuralNetworkRuntime
diff --git a/frameworks/native/neural_network_runtime/nncompiler.cpp b/frameworks/native/neural_network_runtime/nncompiler.cpp
index ed72e08..76e817e 100644
--- a/frameworks/native/neural_network_runtime/nncompiler.cpp
+++ b/frameworks/native/neural_network_runtime/nncompiler.cpp
@@ -30,8 +30,10 @@ namespace {
 const int CACHE_INPUT_TENSORDESC_OFFSET = 2;
 const int CACHE_OUTPUT_TENSORDESC_OFFSET = 1;
 constexpr int32_t  NUMBER_CACHE_INFO_MEMBERS = 3;
+constexpr int32_t NUMBER_CACHE_INFO_EXTENSION_MEMBERS = 2;
 const std::string EXTENSION_KEY_MODEL_NAME = "ModelName";
 const std::string EXTENSION_KEY_FM_SHARED = "NPU_FM_SHARED";
+const std::string EXTENSION_KEY_IS_EXCEED_RAMLIMIT = "isExceedRamLimit";
 const int OPVERSION_SUBSTR_NUM = 2;
 const std::string CURRENT_VERSION = "0x00000000";
 const std::string HIAI_VERSION_PATH = "/data/data/hiai/version";
@@ -565,6 +567,7 @@ OH_NN_ReturnCode NNCompiler::SaveToCacheFile() const
 
     compiledCache.SetModelName(m_extensionConfig.modelName);
     ret = compiledCache.Save(caches, m_cachePath, m_cacheVersion);
+    compiledCache.SetIsExceedRamLimit(m_extensionConfig.isExceedRamLimit);
     if (ret != OH_NN_SUCCESS) {
         LOGE("[NNCompiler] SaveToCacheFile failed, error happened when saving model cache.");
         ReleaseBuffer(tensorBuffers);
@@ -668,7 +671,7 @@ OH_NN_ReturnCode NNCompiler::RestoreFromCacheFile()
 
         if (currentOpVersion > modelCacheInfo.opVersion) {
             const size_t cacheNumber = caches.size();
-            uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + 1;
+            uint32_t cacheSize = NUMBER_CACHE_INFO_MEMBERS + cacheNumber + NUMBER_CACHE_INFO_EXTENSION_MEMBERS;
             uint32_t infoCharNumber = cacheSize * sizeof(int64_t);
 
             std::unique_ptr<int64_t[]> cacheInfo = CreateUniquePtr<int64_t[]>(cacheSize);
@@ -731,6 +734,21 @@ OH_NN_ReturnCode NNCompiler::SetExtensionConfig(const std::unordered_map<std::st
         m_extensionConfig.isNpuFmShared = true;
         LOGI("[NNCompiler] SetExtensionConfig NpuFmShared enabled.");
     }
+    if (configs.find(EXTENSION_KEY_IS_EXCEED_RAMLIMIT) != configs.end()) {
+        std::vector<char> value = configs.at(EXTENSION_KEY_IS_EXCEED_RAMLIMIT);
+        if (value.empty()) {
+            LOGE("[NNCompiler] SetExtensionConfig get empty model name from configs");
+            return OH_NN_INVALID_PARAMETER;
+        }
+
+        if (value[0] == '1') {
+            m_extensionConfig.isExceedRamLimit = true;
+        } else {
+            m_extensionConfig.isExceedRamLimit = false;
+        }
+
+        LOGI("[NNCompiler] SetExtensionConfig isExceedRamLimit enabled.");
+    }
     return OH_NN_SUCCESS;
 }
 
-- 
Gitee


From d81d99e88f8852ca56ec7a70bb22a154cd5f03f4 Mon Sep 17 00:00:00 2001
From: w30052974 <wangyifan94@huawei.com>
Date: Fri, 10 Jan 2025 18:04:21 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E9=89=B4=E6=9D=83=E9=9C=80=E6=B1=82?=
 =?UTF-8?q?=E5=90=88=E5=85=A5master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: w30052974 <wangyifan94@huawei.com>
---
 frameworks/native/neural_network_core/neural_network_core.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frameworks/native/neural_network_core/neural_network_core.cpp b/frameworks/native/neural_network_core/neural_network_core.cpp
index 88a9191..276aa82 100644
--- a/frameworks/native/neural_network_core/neural_network_core.cpp
+++ b/frameworks/native/neural_network_core/neural_network_core.cpp
@@ -512,7 +512,7 @@ OH_NN_ReturnCode CheckExceedRamLimit(const Compilation* compilation, bool& isExc
     } else if (compilation->cachePath != nullptr) {
         std::string modelName;
         compilation->compiler->GetModelName(modelName);
-        ret = nnrtService.CheckModelSizeFromPath(compilation->cachePath, modelName, isExceedRamLimit);
+        ret = nnrtService.CheckModelSizeFromCache(compilation->cachePath, modelName, isExceedRamLimit);
     } else if ((compilation->offlineModelBuffer.first != nullptr) && \
                (compilation->offlineModelBuffer.second != size_t(0))) {
         ret = nnrtService.CheckModelSizeFromBuffer(
@@ -741,7 +741,7 @@ NNRT_API OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation)
     }
 
     std::unordered_map<std::string, std::vector<char>> configs;
-    LOGI("[OH_NNCompilation_Build] isExceedRamLimit: %{public}d", static_cast<int>(isExceedRamLimit));
+    LOGI("[OH_NNCompilation_Build] model isExceedRamLimit: %{public}d", static_cast<int>(isExceedRamLimit));
 
     std::vector<char> configContents;
     if (isExceedRamLimit) {
-- 
Gitee


From e15a12fc7f280dabb5a4449717c17e4d61957403 Mon Sep 17 00:00:00 2001
From: wangyifan <wangyifan94@huawei.com>
Date: Sat, 11 Jan 2025 06:42:43 +0000
Subject: [PATCH 3/3] update
 frameworks/native/neural_network_runtime/nncompiled_cache.cpp.

Signed-off-by: wangyifan <wangyifan94@huawei.com>
---
 frameworks/native/neural_network_runtime/nncompiled_cache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
index 3fbac13..ef71bbf 100644
--- a/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
+++ b/frameworks/native/neural_network_runtime/nncompiled_cache.cpp
@@ -439,7 +439,7 @@ OH_NN_ReturnCode NNCompiledCache::GetCacheFileLength(FILE* pFile, long& fileSize
         return OH_NN_INVALID_FILE;
     }
 
-    if ((handleValue == NULL_PTR_LENGTH)) {
+    if (handleValue == NULL_PTR_LENGTH) {
         LOGE("[NNCompiledCache] GetCacheFileLength failed, unable to read huge or empty input stream, "
              "get cache file size=%{public}ld",
              handleValue);
-- 
Gitee