From 21038303a59410a036872c1498f95c8ab00d4bde Mon Sep 17 00:00:00 2001
From: zengxianghuai <zengxianghuai@h-partners.com>
Date: Mon, 10 Mar 2025 16:03:01 +0800
Subject: [PATCH 1/4] change bolt to adapt ai4c_onnxrunner.so

---
 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch | 123 ++++++++++++++++++++
 llvm-bolt.spec                              |   6 +-
 2 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
diff --git a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
new file mode 100644
index 0000000..1f632ce
--- /dev/null
+++ b/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
@@ -0,0 +1,123 @@
+From ac79496aa1f07c7506e4abc1b17be762961b783d Mon Sep 17 00:00:00 2001
+From: zengxianghuai <zengxianghuai@h-partners.com>
+Date: Tue, 15 Apr 2025 20:51:02 +0800
+Subject: [PATCH] adapt to ai4c_onnxrunner.so
+
+---
+ bolt/include/bolt/Profile/DataReader.h | 67 +++++++++++++++++---------
+ 1 file changed, 44 insertions(+), 23 deletions(-)
+
+diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
+index bf732d47c..d7104eed7 100644
+--- a/bolt/include/bolt/Profile/DataReader.h
++++ b/bolt/include/bolt/Profile/DataReader.h
+@@ -46,12 +46,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
+ }
+ 
+ extern "C" {
+-typedef void *(*CreateONNXRunnerFunc)(const char *);
+-typedef void (*DeleteONNXRunnerFunc)(void *);
+-typedef std::vector<float> (*RunONNXModelFunc)(void *,
+-                                               const std::vector<std::string> &,
+-                                               const std::vector<int64_t> &,
+-                                               const std::vector<float> &, int);
++typedef void (*initEngineFunc)(const char*);
++typedef void (*addInt64InputFunc)(int64_t*, int);
++typedef int (*inferenceFunc)();
++typedef void (*clearEngineFunc)();
++typedef void (*freeEngineFunc)();
++typedef std::vector<float> (*getProbabilityFunc)(int);
+ }
+ 
+ struct Location {
+@@ -295,10 +295,14 @@ public:
+ 
+   ~DataReader() {
+     // delete onnxrunner;
+-    if (onnxRunner && libHandle && handleOnnxRuntime) {
+-      DeleteONNXRunnerFunc deleteONNXRunner =
+-          (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner");
+-      deleteONNXRunner(onnxRunner);
++    if (libHandle && handleOnnxRuntime) {
++      freeEngineFunc freeEngine =
++	  (freeEngineFunc)dlsym(libHandle, "free_engine");
++      if (!freeEngine) {
++        outs() << "error: llvm-bolt failed during loading free_engine.\n";
++        exit(1);
++      }
++      freeEngine();
+       dlclose(libHandle);
+       dlclose(handleOnnxRuntime);
+     }
+@@ -306,21 +310,24 @@ public:
+ 
+   /// Initialize the onnxruntime model.
+   void initializeONNXRunner(const std::string &modelPath) {
+-    if (!onnxRunner && !libHandle && !handleOnnxRuntime) {
++    if (!libHandle && !handleOnnxRuntime) {
+       handleOnnxRuntime =
+           dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL);
+       if (handleOnnxRuntime == nullptr) {
+         outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n";
+         exit(1);
+       }
+-      libHandle = dlopen("libONNXRunner.so", RTLD_LAZY);
++      libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY);
+       if (libHandle == nullptr) {
+-        outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n";
++        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
++        exit(1);
++      }
++      initEngineFunc initialize = (initEngineFunc)dlsym(libHandle, "initialize");
++      if (!initialize) {
++        outs() << "error: llvm-bolt failed during loading initialize.\n";
+         exit(1);
+       }
+-      CreateONNXRunnerFunc createONNXRunner =
+-          (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner");
+-      onnxRunner = createONNXRunner(modelPath.c_str());
++      initialize(modelPath.c_str());
+     }
+   }
+ 
+@@ -328,16 +335,30 @@ public:
+   float ONNXInference(const std::vector<std::string> &input_string,
+                       const std::vector<int64_t> &input_int64,
+                       const std::vector<float> &input_float, int batch_size = 1) {
+-    if (onnxRunner && libHandle) {
+-      RunONNXModelFunc runONNXModel =
+-          (RunONNXModelFunc)dlsym(libHandle, "runONNXModel");
+-      std::vector<float> model_preds = runONNXModel(
+-          onnxRunner, input_string, input_int64, input_float, batch_size);
+-      if (model_preds.size() <= 0) {
++    if (libHandle) {
++      addInt64InputFunc addInt64Input =
++          (addInt64InputFunc)dlsym(libHandle, "add_int64_input");
++      inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference");
++      getProbabilityFunc getProbability =
++          (getProbabilityFunc)dlsym(libHandle, "get_probability");
++      clearEngineFunc clearEngine =
++          (clearEngineFunc)dlsym(libHandle, "clear_engine");
++      
++      if (!addInt64Input || !inference || !getProbability || !clearEngine) {
++        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
++        exit(1);
++      }
++      
++      std::vector<int64_t> temp(input_int64.begin(), input_int64.end());
++      addInt64Input(temp.data(), temp.size());
++      inference();
++      std::vector<float> preds = getProbability(batch_size);
++      if (preds.size() <= 0) {
+         outs() << "error: llvm-bolt model prediction result cannot be empty.\n";
+         exit(1);
+       }
+-      float pred = model_preds[0];
++      float pred = preds[0];
++      clearEngine();
+       return pred;
+     }
+     return -1.0;
+-- 
+2.33.0
+
diff --git a/llvm-bolt.spec b/llvm-bolt.spec
index c3a748a..0bf0392 100644
--- a/llvm-bolt.spec
+++ b/llvm-bolt.spec
@@ -27,7 +27,7 @@
 
 Name:           %{pkg_name}
 Version:        %{bolt_version}
-Release:        5
+Release:        6
 Summary:        BOLT is a post-link optimizer developed to speed up large applications
 License:        Apache-2.0
 URL:            https://github.com/llvm/llvm-project/tree/main/bolt
@@ -46,6 +46,7 @@ Patch8:         0008-merge-fdata-Support-process-no_lbr-profile-file.patch
 Patch9:         0009-support-aarch64-instrumentation.patch
 Patch10:        0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch
 Patch11:        0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
+Patch12:        0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
 
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@@ -159,6 +160,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*.
 %doc %{install_docdir}
 
 %changelog
+* Tue Apr 15 2025 zengxianghuai <zengxianghuai@h-partners.com> 17.0.6-6
+- adapt to ai4c_onnxrunner.so
+
 * Tue Feb 11 2025 rfwang07 <wangrufeng5@huawei.com> 17.0.6-5
 - Type:backport
 - ID:NA
-- 
Gitee


From bdbf47c10541b9af39fdc99c541154464f4d5e41 Mon Sep 17 00:00:00 2001
From: zengxianghuai <zengxianghuai@h-partners.com>
Date: Mon, 28 Apr 2025 20:38:09 +0800
Subject: [PATCH 2/4] fix DataReader.h

---
 ...013-AArch64-Adapt-to-ai4c_onnxrunner.patch | 83 +++++++++++--------
 llvm-bolt.spec                                |  2 +-
 2 files changed, 51 insertions(+), 34 deletions(-)
 rename 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch => 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch (64%)

diff --git a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
similarity index 64%
rename from 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
rename to 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
index 1f632ce..f8d711e 100644
--- a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
+++ b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
@@ -1,17 +1,17 @@
-From ac79496aa1f07c7506e4abc1b17be762961b783d Mon Sep 17 00:00:00 2001
+From bf71c96ed790ae7d0d5c85e22f6c3fc09fd949e4 Mon Sep 17 00:00:00 2001
 From: zengxianghuai <zengxianghuai@h-partners.com>
-Date: Tue, 15 Apr 2025 20:51:02 +0800
-Subject: [PATCH] adapt to ai4c_onnxrunner.so
+Date: Mon, 28 Apr 2025 20:28:45 +0800
+Subject: [PATCH] update DataReader.h
 
 ---
- bolt/include/bolt/Profile/DataReader.h | 67 +++++++++++++++++---------
- 1 file changed, 44 insertions(+), 23 deletions(-)
+ bolt/include/bolt/Profile/DataReader.h | 83 ++++++++++++++++----------
+ 1 file changed, 53 insertions(+), 30 deletions(-)
 
 diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
-index bf732d47c..d7104eed7 100644
+index bf732d47c..9c35c78a2 100644
 --- a/bolt/include/bolt/Profile/DataReader.h
 +++ b/bolt/include/bolt/Profile/DataReader.h
-@@ -46,12 +46,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
+@@ -46,12 +46,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
  }
  
  extern "C" {
@@ -21,16 +21,17 @@ index bf732d47c..d7104eed7 100644
 -                                               const std::vector<std::string> &,
 -                                               const std::vector<int64_t> &,
 -                                               const std::vector<float> &, int);
-+typedef void (*initEngineFunc)(const char*);
++typedef void (*CreateONNXRunnerFunc)(const char*);
 +typedef void (*addInt64InputFunc)(int64_t*, int);
 +typedef int (*inferenceFunc)();
 +typedef void (*clearEngineFunc)();
 +typedef void (*freeEngineFunc)();
-+typedef std::vector<float> (*getProbabilityFunc)(int);
++typedef float *(*getProbabilityFunc)(int, int);
++typedef void (*freeProbabilityFunc)(float*);
  }
  
  struct Location {
-@@ -295,10 +295,14 @@ public:
+@@ -295,32 +296,33 @@ public:
  
    ~DataReader() {
      // delete onnxrunner;
@@ -38,37 +39,37 @@ index bf732d47c..d7104eed7 100644
 -      DeleteONNXRunnerFunc deleteONNXRunner =
 -          (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner");
 -      deleteONNXRunner(onnxRunner);
-+    if (libHandle && handleOnnxRuntime) {
++    if (libHandle) {
 +      freeEngineFunc freeEngine =
-+	  (freeEngineFunc)dlsym(libHandle, "free_engine");
++          (freeEngineFunc)dlsym(libHandle, "free_engine");
 +      if (!freeEngine) {
 +        outs() << "error: llvm-bolt failed during loading free_engine.\n";
 +        exit(1);
 +      }
 +      freeEngine();
        dlclose(libHandle);
-       dlclose(handleOnnxRuntime);
+-      dlclose(handleOnnxRuntime);
      }
-@@ -306,21 +310,24 @@ public:
+   }
  
    /// Initialize the onnxruntime model.
    void initializeONNXRunner(const std::string &modelPath) {
 -    if (!onnxRunner && !libHandle && !handleOnnxRuntime) {
-+    if (!libHandle && !handleOnnxRuntime) {
-       handleOnnxRuntime =
-           dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL);
-       if (handleOnnxRuntime == nullptr) {
-         outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n";
+-      handleOnnxRuntime =
+-          dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL);
+-      if (handleOnnxRuntime == nullptr) {
+-        outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n";
++    if (!libHandle) {
++      libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY);
++      if (!libHandle) {
++        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
          exit(1);
        }
 -      libHandle = dlopen("libONNXRunner.so", RTLD_LAZY);
-+      libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY);
-       if (libHandle == nullptr) {
+-      if (libHandle == nullptr) {
 -        outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n";
-+        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
-+        exit(1);
-+      }
-+      initEngineFunc initialize = (initEngineFunc)dlsym(libHandle, "initialize");
++      initEngineFunc initialize =
++          (initEngineFunc)dlsym(libHandle, "initialize");
 +      if (!initialize) {
 +        outs() << "error: llvm-bolt failed during loading initialize.\n";
          exit(1);
@@ -80,7 +81,7 @@ index bf732d47c..d7104eed7 100644
      }
    }
  
-@@ -328,16 +335,30 @@ public:
+@@ -328,16 +330,37 @@ public:
    float ONNXInference(const std::vector<std::string> &input_string,
                        const std::vector<int64_t> &input_int64,
                        const std::vector<float> &input_float, int batch_size = 1) {
@@ -95,29 +96,45 @@ index bf732d47c..d7104eed7 100644
 +          (addInt64InputFunc)dlsym(libHandle, "add_int64_input");
 +      inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference");
 +      getProbabilityFunc getProbability =
-+          (getProbabilityFunc)dlsym(libHandle, "get_probability");
++          (getProbabilityFunc)dlsym(libHandle, "get_label_probability");
++      freeProbabilityFunc freeProbability =
++          (freeProbabilityFunc)dlsym(libHandle, "free_label_probability");
 +      clearEngineFunc clearEngine =
 +          (clearEngineFunc)dlsym(libHandle, "clear_engine");
 +      
-+      if (!addInt64Input || !inference || !getProbability || !clearEngine) {
++      if (!addInt64Input || !inference || !getProbability || !freeProbability ||
++          !clearEngine) {
 +        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
 +        exit(1);
 +      }
-+      
-+      std::vector<int64_t> temp(input_int64.begin(), input_int64.end());
-+      addInt64Input(temp.data(), temp.size());
++
++      std::vector<int64_t> modelInput(input_int64.begin(), input_int64.end());
++      addInt64Input(modelInput.data(), modelInput.size());
 +      inference();
-+      std::vector<float> preds = getProbability(batch_size);
-+      if (preds.size() <= 0) {
++
++      // output tensors: {label, label_prob}, label_prob=sequence<map<int64, float32>>
++      float* preds = getProbability(1, 1);
++      if (preds == nullptr) {
          outs() << "error: llvm-bolt model prediction result cannot be empty.\n";
          exit(1);
        }
 -      float pred = model_preds[0];
++
 +      float pred = preds[0];
++      freeProbability(preds);
 +      clearEngine();
        return pred;
      }
      return -1.0;
+@@ -356,7 +379,7 @@ protected:
+   void *libHandle;
+ 
+   /// The library handle of the onnxruntime.
+-  void *handleOnnxRuntime;
++  /// void *handleOnnxRuntime;
+ 
+   /// The annotating threshold for the model prediction.
+   float threshold;
 -- 
 2.33.0
 
diff --git a/llvm-bolt.spec b/llvm-bolt.spec
index 0bf0392..39c3033 100644
--- a/llvm-bolt.spec
+++ b/llvm-bolt.spec
@@ -46,7 +46,7 @@ Patch8:         0008-merge-fdata-Support-process-no_lbr-profile-file.patch
 Patch9:         0009-support-aarch64-instrumentation.patch
 Patch10:        0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch
 Patch11:        0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
-Patch12:        0012-AArch64-Adapt-to-ai4c_onnxrunner.patch
+Patch13:        0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
 
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
-- 
Gitee


From 0f7d43b75fe1d8553db72d33f61241f7e8a16f37 Mon Sep 17 00:00:00 2001
From: zengxianghuai <zengxianghuai@h-partners.com>
Date: Mon, 10 Mar 2025 16:03:01 +0800
Subject: [PATCH 3/4]   change bolt to adapt ai4c_onnxrunner.so

---
 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch | 140 ++++++++++++++++++++
 llvm-bolt.spec                              |   6 +-
 2 files changed, 145 insertions(+), 1 deletion(-)
 create mode 100644 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch

diff --git a/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
new file mode 100644
index 0000000..f8d711e
--- /dev/null
+++ b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
@@ -0,0 +1,140 @@
+From bf71c96ed790ae7d0d5c85e22f6c3fc09fd949e4 Mon Sep 17 00:00:00 2001
+From: zengxianghuai <zengxianghuai@h-partners.com>
+Date: Mon, 28 Apr 2025 20:28:45 +0800
+Subject: [PATCH] update DataReader.h
+
+---
+ bolt/include/bolt/Profile/DataReader.h | 83 ++++++++++++++++----------
+ 1 file changed, 53 insertions(+), 30 deletions(-)
+
+diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
+index bf732d47c..9c35c78a2 100644
+--- a/bolt/include/bolt/Profile/DataReader.h
++++ b/bolt/include/bolt/Profile/DataReader.h
+@@ -46,12 +46,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
+ }
+ 
+ extern "C" {
+-typedef void *(*CreateONNXRunnerFunc)(const char *);
+-typedef void (*DeleteONNXRunnerFunc)(void *);
+-typedef std::vector<float> (*RunONNXModelFunc)(void *,
+-                                               const std::vector<std::string> &,
+-                                               const std::vector<int64_t> &,
+-                                               const std::vector<float> &, int);
++typedef void (*CreateONNXRunnerFunc)(const char*);
++typedef void (*addInt64InputFunc)(int64_t*, int);
++typedef int (*inferenceFunc)();
++typedef void (*clearEngineFunc)();
++typedef void (*freeEngineFunc)();
++typedef float *(*getProbabilityFunc)(int, int);
++typedef void (*freeProbabilityFunc)(float*);
+ }
+ 
+ struct Location {
+@@ -295,32 +296,33 @@ public:
+ 
+   ~DataReader() {
+     // delete onnxrunner;
+-    if (onnxRunner && libHandle && handleOnnxRuntime) {
+-      DeleteONNXRunnerFunc deleteONNXRunner =
+-          (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner");
+-      deleteONNXRunner(onnxRunner);
++    if (libHandle) {
++      freeEngineFunc freeEngine =
++          (freeEngineFunc)dlsym(libHandle, "free_engine");
++      if (!freeEngine) {
++        outs() << "error: llvm-bolt failed during loading free_engine.\n";
++        exit(1);
++      }
++      freeEngine();
+       dlclose(libHandle);
+-      dlclose(handleOnnxRuntime);
+     }
+   }
+ 
+   /// Initialize the onnxruntime model.
+   void initializeONNXRunner(const std::string &modelPath) {
+-    if (!onnxRunner && !libHandle && !handleOnnxRuntime) {
+-      handleOnnxRuntime =
+-          dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL);
+-      if (handleOnnxRuntime == nullptr) {
+-        outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n";
++    if (!libHandle) {
++      libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY);
++      if (!libHandle) {
++        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
+         exit(1);
+       }
+-      libHandle = dlopen("libONNXRunner.so", RTLD_LAZY);
+-      if (libHandle == nullptr) {
+-        outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n";
++      initEngineFunc initialize =
++          (initEngineFunc)dlsym(libHandle, "initialize");
++      if (!initialize) {
++        outs() << "error: llvm-bolt failed during loading initialize.\n";
+         exit(1);
+       }
+-      CreateONNXRunnerFunc createONNXRunner =
+-          (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner");
+-      onnxRunner = createONNXRunner(modelPath.c_str());
++      initialize(modelPath.c_str());
+     }
+   }
+ 
+@@ -328,16 +330,37 @@ public:
+   float ONNXInference(const std::vector<std::string> &input_string,
+                       const std::vector<int64_t> &input_int64,
+                       const std::vector<float> &input_float, int batch_size = 1) {
+-    if (onnxRunner && libHandle) {
+-      RunONNXModelFunc runONNXModel =
+-          (RunONNXModelFunc)dlsym(libHandle, "runONNXModel");
+-      std::vector<float> model_preds = runONNXModel(
+-          onnxRunner, input_string, input_int64, input_float, batch_size);
+-      if (model_preds.size() <= 0) {
++    if (libHandle) {
++      addInt64InputFunc addInt64Input =
++          (addInt64InputFunc)dlsym(libHandle, "add_int64_input");
++      inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference");
++      getProbabilityFunc getProbability =
++          (getProbabilityFunc)dlsym(libHandle, "get_label_probability");
++      freeProbabilityFunc freeProbability =
++          (freeProbabilityFunc)dlsym(libHandle, "free_label_probability");
++      clearEngineFunc clearEngine =
++          (clearEngineFunc)dlsym(libHandle, "clear_engine");
++      
++      if (!addInt64Input || !inference || !getProbability || !freeProbability ||
++          !clearEngine) {
++        outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n";
++        exit(1);
++      }
++
++      std::vector<int64_t> modelInput(input_int64.begin(), input_int64.end());
++      addInt64Input(modelInput.data(), modelInput.size());
++      inference();
++
++      // output tensors: {label, label_prob}, label_prob=sequence<map<int64, float32>>
++      float* preds = getProbability(1, 1);
++      if (preds == nullptr) {
+         outs() << "error: llvm-bolt model prediction result cannot be empty.\n";
+         exit(1);
+       }
+-      float pred = model_preds[0];
++
++      float pred = preds[0];
++      freeProbability(preds);
++      clearEngine();
+       return pred;
+     }
+     return -1.0;
+@@ -356,7 +379,7 @@ protected:
+   void *libHandle;
+ 
+   /// The library handle of the onnxruntime.
+-  void *handleOnnxRuntime;
++  /// void *handleOnnxRuntime;
+ 
+   /// The annotating threshold for the model prediction.
+   float threshold;
+-- 
+2.33.0
+
diff --git a/llvm-bolt.spec b/llvm-bolt.spec
index c3a748a..39c3033 100644
--- a/llvm-bolt.spec
+++ b/llvm-bolt.spec
@@ -27,7 +27,7 @@
 
 Name:           %{pkg_name}
 Version:        %{bolt_version}
-Release:        5
+Release:        6
 Summary:        BOLT is a post-link optimizer developed to speed up large applications
 License:        Apache-2.0
 URL:            https://github.com/llvm/llvm-project/tree/main/bolt
@@ -46,6 +46,7 @@ Patch8:         0008-merge-fdata-Support-process-no_lbr-profile-file.patch
 Patch9:         0009-support-aarch64-instrumentation.patch
 Patch10:        0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch
 Patch11:        0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
+Patch13:        0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
 
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@@ -159,6 +160,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*.
 %doc %{install_docdir}
 
 %changelog
+* Tue Apr 15 2025 zengxianghuai <zengxianghuai@h-partners.com> 17.0.6-6
+- adapt to ai4c_onnxrunner.so
+
 * Tue Feb 11 2025 rfwang07 <wangrufeng5@huawei.com> 17.0.6-5
 - Type:backport
 - ID:NA
-- 
Gitee


From 6aa549bec0133a494d7f2531500370305d4d87a6 Mon Sep 17 00:00:00 2001
From: rfwang07 <wangrufeng5@huawei.com>
Date: Tue, 22 Apr 2025 19:46:53 +0800
Subject: [PATCH 4/4] Add Om for Kunpeng Opts

---
 0012-Add-Om-for-Kunpeng-Opts.patch | 380 +++++++++++++++++++++++++++++
 llvm-bolt.spec                     |   9 +
 2 files changed, 389 insertions(+)
 create mode 100644 0012-Add-Om-for-Kunpeng-Opts.patch

diff --git a/0012-Add-Om-for-Kunpeng-Opts.patch b/0012-Add-Om-for-Kunpeng-Opts.patch
new file mode 100644
index 0000000..0f8a8ee
--- /dev/null
+++ b/0012-Add-Om-for-Kunpeng-Opts.patch
@@ -0,0 +1,380 @@
+From f4757a1bff16b44a329e3f70973ca6c623518291 Mon Sep 17 00:00:00 2001
+From: rfwang07 <wangrufeng5@huawei.com>
+Date: Tue, 22 Apr 2025 19:33:52 +0800
+Subject: [PATCH] Add Om for Kunpeng Opts
+
+---
+ bolt/include/bolt/Passes/BinaryPasses.h   |  8 +++
+ bolt/include/bolt/Passes/SplitFunctions.h | 19 +++++++
+ bolt/lib/Core/BinaryFunctionProfile.cpp   |  6 +--
+ bolt/lib/Passes/BinaryPasses.cpp          | 10 +---
+ bolt/lib/Passes/IndirectCallPromotion.cpp |  2 +-
+ bolt/lib/Passes/Inliner.cpp               |  2 +-
+ bolt/lib/Passes/MCF.cpp                   |  2 +-
+ bolt/lib/Passes/ReorderFunctions.cpp      |  2 +-
+ bolt/lib/Passes/SplitFunctions.cpp        | 23 +--------
+ bolt/lib/Passes/VeneerElimination.cpp     |  2 +-
+ bolt/lib/Rewrite/BinaryPassManager.cpp    |  6 +--
+ bolt/lib/Rewrite/RewriteInstance.cpp      |  2 +-
+ bolt/tools/driver/llvm-bolt.cpp           | 61 +++++++++++++++++++++++
+ 13 files changed, 104 insertions(+), 41 deletions(-)
+
+diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h
+index dace07e..5a2fe3b 100644
+--- a/bolt/include/bolt/Passes/BinaryPasses.h
++++ b/bolt/include/bolt/Passes/BinaryPasses.h
+@@ -23,6 +23,14 @@
+ #include <string>
+ #include <unordered_set>
+ 
++namespace opts {
++enum SctcModes : char {
++  SctcAlways,
++  SctcPreserveDirection,
++  SctcHeuristic
++};
++}
++
+ namespace llvm {
+ namespace bolt {
+ 
+diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
+index 4058f33..a8c3a14 100644
+--- a/bolt/include/bolt/Passes/SplitFunctions.h
++++ b/bolt/include/bolt/Passes/SplitFunctions.h
+@@ -15,6 +15,25 @@
+ #include "llvm/Support/CommandLine.h"
+ #include <atomic>
+ 
++using namespace llvm;
++
++class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
++public:
++  explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
++      : cl::parser<bool>(O) {}
++
++  bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
++    if (Arg == "2" || Arg == "3") {
++      Value = true;
++      errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
++                        "for option -{1} is deprecated\n",
++                        Arg, ArgName);
++      return false;
++    }
++    return cl::parser<bool>::parse(O, ArgName, Arg, Value);
++  }
++};
++
+ namespace llvm {
+ namespace bolt {
+ 
+diff --git a/bolt/lib/Core/BinaryFunctionProfile.cpp b/bolt/lib/Core/BinaryFunctionProfile.cpp
+index 0d705cd..c062f8d 100644
+--- a/bolt/lib/Core/BinaryFunctionProfile.cpp
++++ b/bolt/lib/Core/BinaryFunctionProfile.cpp
+@@ -44,17 +44,17 @@ static cl::alias ICPAlias("icp",
+ 
+ extern cl::opt<JumpTableSupportLevel> JumpTables;
+ 
+-static cl::opt<bool> FixFuncCounts(
++cl::opt<bool> FixFuncCounts(
+     "fix-func-counts",
+     cl::desc("adjust function counts based on basic blocks execution count"),
+     cl::Hidden, cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool> FixBlockCounts(
++cl::opt<bool> FixBlockCounts(
+     "fix-block-counts",
+     cl::desc("adjust block counts based on outgoing branch counts"),
+     cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool>
++cl::opt<bool>
+     InferFallThroughs("infer-fall-throughs",
+                       cl::desc("infer execution count for fall-through blocks"),
+                       cl::Hidden, cl::cat(BoltOptCategory));
+diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
+index a674fb4..1db25ff 100644
+--- a/bolt/lib/Passes/BinaryPasses.cpp
++++ b/bolt/lib/Passes/BinaryPasses.cpp
+@@ -88,7 +88,7 @@ static cl::opt<bool> MinBranchClusters(
+              "branches"),
+     cl::Hidden, cl::cat(BoltOptCategory));
+ 
+-static cl::list<Peepholes::PeepholeOpts> Peepholes(
++cl::list<Peepholes::PeepholeOpts> Peepholes(
+     "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"),
+     cl::value_desc("opt1,opt2,opt3,..."),
+     cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"),
+@@ -176,13 +176,7 @@ static cl::opt<bool>
+                      cl::desc("print the list of functions with stale profile"),
+                      cl::Hidden, cl::cat(BoltOptCategory));
+ 
+-enum SctcModes : char {
+-  SctcAlways,
+-  SctcPreserveDirection,
+-  SctcHeuristic
+-};
+-
+-static cl::opt<SctcModes>
++cl::opt<SctcModes>
+ SctcMode("sctc-mode",
+   cl::desc("mode for simplify conditional tail calls"),
+   cl::init(SctcAlways),
+diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
+index ea80194..4ff6837 100644
+--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
++++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
+@@ -72,7 +72,7 @@ static cl::alias ICPMispredictThresholdAlias(
+     cl::desc("alias for --indirect-call-promotion-mispredict-threshold"),
+     cl::aliasopt(ICPMispredictThreshold));
+ 
+-static cl::opt<bool> ICPUseMispredicts(
++cl::opt<bool> ICPUseMispredicts(
+     "indirect-call-promotion-use-mispredicts",
+     cl::desc("use misprediction frequency for determining whether or not ICP "
+              "should be applied at a callsite.  The "
+diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
+index 67dd294..7fdd780 100644
+--- a/bolt/lib/Passes/Inliner.cpp
++++ b/bolt/lib/Passes/Inliner.cpp
+@@ -50,7 +50,7 @@ ForceInlineFunctions("force-inline",
+   cl::Hidden,
+   cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
++cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
+                                cl::cat(BoltOptCategory));
+ 
+ static cl::opt<bool> InlineIgnoreLeafCFI(
+diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp
+index a6455bb..bd98286 100644
+--- a/bolt/lib/Passes/MCF.cpp
++++ b/bolt/lib/Passes/MCF.cpp
+@@ -31,7 +31,7 @@ extern cl::OptionCategory BoltOptCategory;
+ 
+ extern cl::opt<bool> TimeOpts;
+ 
+-static cl::opt<bool> IterativeGuess(
++cl::opt<bool> IterativeGuess(
+     "iterative-guess",
+     cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
+     cl::Hidden, cl::cat(BoltOptCategory));
+diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp
+index 2fc99f6..359cd05 100644
+--- a/bolt/lib/Passes/ReorderFunctions.cpp
++++ b/bolt/lib/Passes/ReorderFunctions.cpp
+@@ -98,7 +98,7 @@ static cl::opt<bool> CgIgnoreRecursiveCalls(
+     cl::desc("ignore recursive calls when constructing the call graph"),
+     cl::init(true), cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool>
++cl::opt<bool>
+ CgUseSplitHotSize("cg-use-split-hot-size",
+   cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
+            "call graph functions"),
+diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
+index 34973ce..e934b75 100644
+--- a/bolt/lib/Passes/SplitFunctions.cpp
++++ b/bolt/lib/Passes/SplitFunctions.cpp
+@@ -34,25 +34,6 @@
+ using namespace llvm;
+ using namespace bolt;
+ 
+-namespace {
+-class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> {
+-public:
+-  explicit DeprecatedSplitFunctionOptionParser(cl::Option &O)
+-      : cl::parser<bool>(O) {}
+-
+-  bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) {
+-    if (Arg == "2" || Arg == "3") {
+-      Value = true;
+-      errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" "
+-                        "for option -{1} is deprecated\n",
+-                        Arg, ArgName);
+-      return false;
+-    }
+-    return cl::parser<bool>::parse(O, ArgName, Arg, Value);
+-  }
+-};
+-} // namespace
+-
+ namespace opts {
+ 
+ extern cl::OptionCategory BoltOptCategory;
+@@ -61,7 +42,7 @@ extern cl::opt<bool> SplitEH;
+ extern cl::opt<unsigned> ExecutionCountThreshold;
+ extern cl::opt<uint32_t> RandomSeed;
+ 
+-static cl::opt<bool> AggressiveSplitting(
++cl::opt<bool> AggressiveSplitting(
+     "split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
+     cl::cat(BoltOptCategory));
+ 
+@@ -74,7 +55,7 @@ static cl::opt<unsigned> SplitAlignThreshold(
+ 
+     cl::Hidden, cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
++cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
+     SplitFunctions("split-functions",
+                    cl::desc("split functions into fragments"),
+                    cl::cat(BoltOptCategory));
+diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp
+index eadbfc1..611d027 100644
+--- a/bolt/lib/Passes/VeneerElimination.cpp
++++ b/bolt/lib/Passes/VeneerElimination.cpp
+@@ -20,7 +20,7 @@ namespace opts {
+ 
+ extern cl::OptionCategory BoltOptCategory;
+ 
+-static llvm::cl::opt<bool>
++llvm::cl::opt<bool>
+     EliminateVeneers("elim-link-veneers",
+                      cl::desc("run veneer elimination pass"), cl::init(true),
+                      cl::Hidden, cl::cat(BoltOptCategory));
+diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
+index 517984d..c231037 100644
+--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
++++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
+@@ -58,7 +58,7 @@ DynoStatsAll("dyno-stats-all",
+   cl::desc("print dyno stats after each stage"),
+   cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
+ 
+-static cl::opt<bool>
++cl::opt<bool>
+     EliminateUnreachable("eliminate-unreachable",
+                          cl::desc("eliminate unreachable code"), cl::init(true),
+                          cl::cat(BoltOptCategory));
+@@ -207,12 +207,12 @@ static cl::opt<bool> RegReAssign(
+         "reassign registers so as to avoid using REX prefixes in hot code"),
+     cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool> SimplifyConditionalTailCalls(
++cl::opt<bool> SimplifyConditionalTailCalls(
+     "simplify-conditional-tail-calls",
+     cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
+     cl::init(true), cl::cat(BoltOptCategory));
+ 
+-static cl::opt<bool> SimplifyRODataLoads(
++cl::opt<bool> SimplifyRODataLoads(
+     "simplify-rodata-loads",
+     cl::desc("simplify loads from read-only sections by replacing the memory "
+              "operand with the constant found in the corresponding section"),
+diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
+index 255c1ae..7a931db 100644
+--- a/bolt/lib/Rewrite/RewriteInstance.cpp
++++ b/bolt/lib/Rewrite/RewriteInstance.cpp
+@@ -234,7 +234,7 @@ static cl::opt<std::string> DWPPathName("dwp",
+                                         cl::Hidden, cl::init(""),
+                                         cl::cat(BoltCategory));
+ 
+-static cl::opt<bool>
++cl::opt<bool>
+ UseGnuStack("use-gnu-stack",
+   cl::desc("use GNU_STACK program header for new segment (workaround for "
+            "issues with strip/objcopy)"),
+diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
+index 5a3af6a..8cba4d2 100644
+--- a/bolt/tools/driver/llvm-bolt.cpp
++++ b/bolt/tools/driver/llvm-bolt.cpp
+@@ -26,6 +26,10 @@
+ #include "llvm/Support/PrettyStackTrace.h"
+ #include "llvm/Support/Signals.h"
+ #include "llvm/Support/TargetSelect.h"
++#include "bolt/Passes/BinaryPasses.h"
++#include "bolt/Passes/ReorderFunctions.h"
++#include "bolt/Passes/SplitFunctions.h"
++#include "bolt/Passes/TailDuplication.h"
+ 
+ #define DEBUG_TYPE "bolt"
+ 
+@@ -35,6 +39,28 @@ using namespace bolt;
+ 
+ namespace opts {
+ 
++extern cl::opt<ReorderBasicBlocks::LayoutType> ReorderBlocks;
++extern cl::opt<ReorderFunctions::ReorderType> ReorderFunctions;
++extern cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> SplitFunctions;
++extern cl::opt<bool> AggressiveSplitting;
++extern cl::opt<bool> ICF;
++extern cl::opt<bool> UseGnuStack;
++extern cl::opt<bool> InlineAll;
++extern cl::opt<bool> InferFallThroughs;
++extern cl::opt<bool> SimplifyConditionalTailCalls;
++extern cl::opt<bool> SimplifyRODataLoads;
++extern cl::opt<bool> ICPUseMispredicts;
++extern cl::opt<bool> EliminateVeneers;
++extern cl::opt<bool> EliminateUnreachable;
++extern cl::opt<bool> FixBlockCounts;
++extern cl::opt<bool> FixFuncCounts;
++extern cl::opt<SctcModes> SctcMode;
++extern cl::opt<bool> AlignBlocks;
++extern cl::opt<bool> CgUseSplitHotSize;
++extern cl::opt<TailDuplication::DuplicationMode> TailDuplicationMode;
++extern cl::opt<bool> IterativeGuess;
++extern cl::opt<bool> AssumeABI;
++
+ static cl::OptionCategory *BoltCategories[] = {&BoltCategory,
+                                                &BoltOptCategory,
+                                                &BoltRelocCategory,
+@@ -69,6 +95,12 @@ InputDataFilename2("data2",
+   cl::Optional,
+   cl::cat(BoltCategory));
+ 
++static cl::opt<bool>
++Om("Om",
++  cl::desc("Kunpeng optimization"),
++  cl::ZeroOrMore,
++  cl::cat(BoltOptCategory));
++
+ static cl::opt<std::string>
+ InputFilename2(
+   cl::Positional,
+@@ -152,6 +184,34 @@ void boltDiffMode(int argc, char **argv) {
+   opts::DiffOnly = true;
+ }
+ 
++void handleOptionOm() {
++  if (!opts::Om) {
++    return;
++  }
++
++  opts::ReorderBlocks = ReorderBasicBlocks::LT_OPTIMIZE_EXT_TSP; // -reorder-blocks=ext-tsp
++  opts::ReorderFunctions = ReorderFunctions::RT_HFSORT_PLUS;     // -reorder-functions=hfsort+
++  opts::SplitFunctions = true;                                   // -split-functions
++  opts::AggressiveSplitting = true;                              // -split-all-cold
++  opts::ICF = true;                                              // -icf=1
++  opts::UseGnuStack = true;                                      // -use-gnu-stack
++  opts::InlineAll = true;                                        // --inline-all
++  opts::InferFallThroughs = true;                                // --infer-fall-throughs
++  opts::SimplifyConditionalTailCalls = true;                     // --simplify-conditional-tail-calls
++  opts::SimplifyRODataLoads = true;                              // --simplify-rodata-loads
++  opts::ICPUseMispredicts = true;                                // --indirect-call-promotion-use-mispredicts
++  opts::EliminateVeneers = true;                                 // --elim-link-veneers
++  opts::EliminateUnreachable = true;                             // --eliminate-unreachable
++  opts::FixBlockCounts = true;                                   // --fix-block-counts
++  opts::FixFuncCounts = true;                                    // --fix-func-counts
++  opts::SctcMode = opts::SctcModes::SctcPreserveDirection;       // --sctc-mode=preserve
++  opts::AlignBlocks = true;                                      // --align-blocks
++  opts::CgUseSplitHotSize = true;                                // --cg-use-split-hot-size
++  opts::TailDuplicationMode = TailDuplication::TD_AGGRESSIVE;    // --tail-duplication=aggressive
++  opts::IterativeGuess = true;                                   // --iterative-guess
++  opts::AssumeABI = true;                                        // --assume-abi
++}
++
+ void boltMode(int argc, char **argv) {
+   cl::HideUnrelatedOptions(ArrayRef(opts::BoltCategories));
+   // Register the target printer for --version.
+@@ -160,6 +220,7 @@ void boltMode(int argc, char **argv) {
+ 
+   cl::ParseCommandLineOptions(argc, argv,
+                               "BOLT - Binary Optimization and Layout Tool\n");
++  handleOptionOm();
+ 
+   if (opts::OutputFilename.empty()) {
+     errs() << ToolName << ": expected -o=<output file> option.\n";
+-- 
+2.33.0
+
diff --git a/llvm-bolt.spec b/llvm-bolt.spec
index 39c3033..651f0ac 100644
--- a/llvm-bolt.spec
+++ b/llvm-bolt.spec
@@ -46,6 +46,7 @@ Patch8:         0008-merge-fdata-Support-process-no_lbr-profile-file.patch
 Patch9:         0009-support-aarch64-instrumentation.patch
 Patch10:        0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch
 Patch11:        0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
+Patch12:        0012-Add-Om-for-Kunpeng-Opts.patch
 Patch13:        0013-AArch64-Adapt-to-ai4c_onnxrunner.patch
 
 BuildRequires:  gcc
@@ -160,8 +161,16 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*.
 %doc %{install_docdir}
 
 %changelog
+<<<<<<< HEAD
 * Tue Apr 15 2025 zengxianghuai <zengxianghuai@h-partners.com> 17.0.6-6
 - adapt to ai4c_onnxrunner.so
+=======
+* Fri Apr 25 2025 rfwang07 <wangrufeng5@huawei.com> 17.0.6-6
+- Type:backport
+- ID:NA
+- SUG:NA
+- DESC: Add Om for Kunpeng Opts
+>>>>>>> a676ed9 (Add Om for Kunpeng Opts)
 
 * Tue Feb 11 2025 rfwang07 <wangrufeng5@huawei.com> 17.0.6-5
 - Type:backport
-- 
Gitee