From 21038303a59410a036872c1498f95c8ab00d4bde Mon Sep 17 00:00:00 2001 From: zengxianghuai Date: Mon, 10 Mar 2025 16:03:01 +0800 Subject: [PATCH 1/4] change bolt to adapt ai4c_onnxrunner.so --- 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch | 123 ++++++++++++++++++++ llvm-bolt.spec | 6 +- 2 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch diff --git a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch new file mode 100644 index 0000000..1f632ce --- /dev/null +++ b/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch @@ -0,0 +1,123 @@ +From ac79496aa1f07c7506e4abc1b17be762961b783d Mon Sep 17 00:00:00 2001 +From: zengxianghuai +Date: Tue, 15 Apr 2025 20:51:02 +0800 +Subject: [PATCH] adapt to ai4c_onnxrunner.so + +--- + bolt/include/bolt/Profile/DataReader.h | 67 +++++++++++++++++--------- + 1 file changed, 44 insertions(+), 23 deletions(-) + +diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h +index bf732d47c..d7104eed7 100644 +--- a/bolt/include/bolt/Profile/DataReader.h ++++ b/bolt/include/bolt/Profile/DataReader.h +@@ -46,12 +46,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { + } + + extern "C" { +-typedef void *(*CreateONNXRunnerFunc)(const char *); +-typedef void (*DeleteONNXRunnerFunc)(void *); +-typedef std::vector (*RunONNXModelFunc)(void *, +- const std::vector &, +- const std::vector &, +- const std::vector &, int); ++typedef void (*initEngineFunc)(const char*); ++typedef void (*addInt64InputFunc)(int64_t*, int); ++typedef int (*inferenceFunc)(); ++typedef void (*clearEngineFunc)(); ++typedef void (*freeEngineFunc)(); ++typedef std::vector (*getProbabilityFunc)(int); + } + + struct Location { +@@ -295,10 +295,14 @@ public: + + ~DataReader() { + // delete onnxrunner; +- if (onnxRunner && libHandle && handleOnnxRuntime) { +- DeleteONNXRunnerFunc deleteONNXRunner = +- (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); +- deleteONNXRunner(onnxRunner); ++ if (libHandle && handleOnnxRuntime) { ++ freeEngineFunc freeEngine = ++ (freeEngineFunc)dlsym(libHandle, "free_engine"); ++ if (!freeEngine) { ++ outs() << "error: llvm-bolt failed during loading free_engine.\n"; ++ exit(1); ++ } ++ freeEngine(); + dlclose(libHandle); + dlclose(handleOnnxRuntime); + } +@@ -306,21 +310,24 @@ public: + + /// Initialize the onnxruntime model. + void initializeONNXRunner(const std::string &modelPath) { +- if (!onnxRunner && !libHandle && !handleOnnxRuntime) { ++ if (!libHandle && !handleOnnxRuntime) { + handleOnnxRuntime = + dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); + if (handleOnnxRuntime == nullptr) { + outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; + exit(1); + } +- libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); ++ libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY); + if (libHandle == nullptr) { +- outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; ++ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; ++ exit(1); ++ } ++ initEngineFunc initialize = (initEngineFunc)dlsym(libHandle, "initialize"); ++ if (!initialize) { ++ outs() << "error: llvm-bolt failed during loading initialize.\n"; + exit(1); + } +- CreateONNXRunnerFunc createONNXRunner = +- (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); +- onnxRunner = createONNXRunner(modelPath.c_str()); ++ initialize(modelPath.c_str()); + } + } + +@@ -328,16 +335,30 @@ public: + float ONNXInference(const std::vector &input_string, + const std::vector &input_int64, + const std::vector &input_float, int batch_size = 1) { +- if (onnxRunner && libHandle) { +- RunONNXModelFunc runONNXModel = +- (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); +- std::vector model_preds = runONNXModel( +- onnxRunner, input_string, input_int64, input_float, batch_size); +- if (model_preds.size() <= 0) { ++ if (libHandle) { ++ addInt64InputFunc addInt64Input = ++ (addInt64InputFunc)dlsym(libHandle, "add_int64_input"); ++ inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference"); ++ getProbabilityFunc getProbability = ++ (getProbabilityFunc)dlsym(libHandle, "get_probability"); ++ clearEngineFunc clearEngine = ++ (clearEngineFunc)dlsym(libHandle, "clear_engine"); ++ ++ if (!addInt64Input || !inference || !getProbability || !clearEngine) { ++ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; ++ exit(1); ++ } ++ ++ std::vector temp(input_int64.begin(), input_int64.end()); ++ addInt64Input(temp.data(), temp.size()); ++ inference(); ++ std::vector preds = getProbability(batch_size); ++ if (preds.size() <= 0) { + outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; + exit(1); + } +- float pred = model_preds[0]; ++ float pred = preds[0]; ++ clearEngine(); + return pred; + } + return -1.0; +-- +2.33.0 + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index c3a748a..0bf0392 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -27,7 +27,7 @@ Name: %{pkg_name} Version: %{bolt_version} -Release: 5 +Release: 6 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache-2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt @@ -46,6 +46,7 @@ Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch Patch9: 0009-support-aarch64-instrumentation.patch Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch +Patch12: 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -159,6 +160,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*. %doc %{install_docdir} %changelog +* Tue Apr 15 2025 zengxianghuai 17.0.6-6 +- adapt to ai4c_onnxrunner.so + * Tue Feb 11 2025 rfwang07 17.0.6-5 - Type:backport - ID:NA -- Gitee From bdbf47c10541b9af39fdc99c541154464f4d5e41 Mon Sep 17 00:00:00 2001 From: zengxianghuai Date: Mon, 28 Apr 2025 20:38:09 +0800 Subject: [PATCH 2/4] fix DataReader.h --- ...013-AArch64-Adapt-to-ai4c_onnxrunner.patch | 83 +++++++++++-------- llvm-bolt.spec | 2 +- 2 files changed, 51 insertions(+), 34 deletions(-) rename 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch => 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch (64%) diff --git a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch similarity index 64% rename from 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch rename to 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch index 1f632ce..f8d711e 100644 --- a/0012-AArch64-Adapt-to-ai4c_onnxrunner.patch +++ b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch @@ -1,17 +1,17 @@ -From ac79496aa1f07c7506e4abc1b17be762961b783d Mon Sep 17 00:00:00 2001 +From bf71c96ed790ae7d0d5c85e22f6c3fc09fd949e4 Mon Sep 17 00:00:00 2001 From: zengxianghuai -Date: Tue, 15 Apr 2025 20:51:02 +0800 -Subject: [PATCH] adapt to ai4c_onnxrunner.so +Date: Mon, 28 Apr 2025 20:28:45 +0800 +Subject: [PATCH] update DataReader.h --- - bolt/include/bolt/Profile/DataReader.h | 67 +++++++++++++++++--------- - 1 file changed, 44 insertions(+), 23 deletions(-) + bolt/include/bolt/Profile/DataReader.h | 83 ++++++++++++++++---------- + 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h -index bf732d47c..d7104eed7 100644 +index bf732d47c..9c35c78a2 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h -@@ -46,12 +46,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { +@@ -46,12 +46,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { } extern "C" { @@ -21,16 +21,17 @@ index bf732d47c..d7104eed7 100644 - const std::vector &, - const std::vector &, - const std::vector &, int); -+typedef void (*initEngineFunc)(const char*); ++typedef void (*CreateONNXRunnerFunc)(const char*); +typedef void (*addInt64InputFunc)(int64_t*, int); +typedef int (*inferenceFunc)(); +typedef void (*clearEngineFunc)(); +typedef void (*freeEngineFunc)(); -+typedef std::vector (*getProbabilityFunc)(int); ++typedef float *(*getProbabilityFunc)(int, int); ++typedef void (*freeProbabilityFunc)(float*); } struct Location { -@@ -295,10 +295,14 @@ public: +@@ -295,32 +296,33 @@ public: ~DataReader() { // delete onnxrunner; @@ -38,37 +39,37 @@ index bf732d47c..d7104eed7 100644 - DeleteONNXRunnerFunc deleteONNXRunner = - (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); - deleteONNXRunner(onnxRunner); -+ if (libHandle && handleOnnxRuntime) { ++ if (libHandle) { + freeEngineFunc freeEngine = -+ (freeEngineFunc)dlsym(libHandle, "free_engine"); ++ (freeEngineFunc)dlsym(libHandle, "free_engine"); + if (!freeEngine) { + outs() << "error: llvm-bolt failed during loading free_engine.\n"; + exit(1); + } + freeEngine(); dlclose(libHandle); - dlclose(handleOnnxRuntime); +- dlclose(handleOnnxRuntime); } -@@ -306,21 +310,24 @@ public: + } /// Initialize the onnxruntime model. void initializeONNXRunner(const std::string &modelPath) { - if (!onnxRunner && !libHandle && !handleOnnxRuntime) { -+ if (!libHandle && !handleOnnxRuntime) { - handleOnnxRuntime = - dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); - if (handleOnnxRuntime == nullptr) { - outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; +- handleOnnxRuntime = +- dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); +- if (handleOnnxRuntime == nullptr) { +- outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; ++ if (!libHandle) { ++ libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY); ++ if (!libHandle) { ++ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; exit(1); } - libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); -+ libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY); - if (libHandle == nullptr) { +- if (libHandle == nullptr) { - outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; -+ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; -+ exit(1); -+ } -+ initEngineFunc initialize = (initEngineFunc)dlsym(libHandle, "initialize"); ++ initEngineFunc initialize = ++ (initEngineFunc)dlsym(libHandle, "initialize"); + if (!initialize) { + outs() << "error: llvm-bolt failed during loading initialize.\n"; exit(1); @@ -80,7 +81,7 @@ index bf732d47c..d7104eed7 100644 } } -@@ -328,16 +335,30 @@ public: +@@ -328,16 +330,37 @@ public: float ONNXInference(const std::vector &input_string, const std::vector &input_int64, const std::vector &input_float, int batch_size = 1) { @@ -95,29 +96,45 @@ index bf732d47c..d7104eed7 100644 + (addInt64InputFunc)dlsym(libHandle, "add_int64_input"); + inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference"); + getProbabilityFunc getProbability = -+ (getProbabilityFunc)dlsym(libHandle, "get_probability"); ++ (getProbabilityFunc)dlsym(libHandle, "get_label_probability"); ++ freeProbabilityFunc freeProbability = ++ (freeProbabilityFunc)dlsym(libHandle, "free_label_probability"); + clearEngineFunc clearEngine = + (clearEngineFunc)dlsym(libHandle, "clear_engine"); + -+ if (!addInt64Input || !inference || !getProbability || !clearEngine) { ++ if (!addInt64Input || !inference || !getProbability || !freeProbability || ++ !clearEngine) { + outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; + exit(1); + } -+ -+ std::vector temp(input_int64.begin(), input_int64.end()); -+ addInt64Input(temp.data(), temp.size()); ++ ++ std::vector modelInput(input_int64.begin(), input_int64.end()); ++ addInt64Input(modelInput.data(), modelInput.size()); + inference(); -+ std::vector preds = getProbability(batch_size); -+ if (preds.size() <= 0) { ++ ++ // output tensors: {label, label_prob}, label_prob=sequence> ++ float* preds = getProbability(1, 1); ++ if (preds == nullptr) { outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; exit(1); } - float pred = model_preds[0]; ++ + float pred = preds[0]; ++ freeProbability(preds); + clearEngine(); return pred; } return -1.0; +@@ -356,7 +379,7 @@ protected: + void *libHandle; + + /// The library handle of the onnxruntime. +- void *handleOnnxRuntime; ++ /// void *handleOnnxRuntime; + + /// The annotating threshold for the model prediction. + float threshold; -- 2.33.0 diff --git a/llvm-bolt.spec b/llvm-bolt.spec index 0bf0392..39c3033 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -46,7 +46,7 @@ Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch Patch9: 0009-support-aarch64-instrumentation.patch Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch -Patch12: 0012-AArch64-Adapt-to-ai4c_onnxrunner.patch +Patch13: 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch BuildRequires: gcc BuildRequires: gcc-c++ -- Gitee From 0f7d43b75fe1d8553db72d33f61241f7e8a16f37 Mon Sep 17 00:00:00 2001 From: zengxianghuai Date: Mon, 10 Mar 2025 16:03:01 +0800 Subject: [PATCH 3/4] change bolt to adapt ai4c_onnxrunner.so --- 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch | 140 ++++++++++++++++++++ llvm-bolt.spec | 6 +- 2 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch diff --git a/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch new file mode 100644 index 0000000..f8d711e --- /dev/null +++ b/0013-AArch64-Adapt-to-ai4c_onnxrunner.patch @@ -0,0 +1,140 @@ +From bf71c96ed790ae7d0d5c85e22f6c3fc09fd949e4 Mon Sep 17 00:00:00 2001 +From: zengxianghuai +Date: Mon, 28 Apr 2025 20:28:45 +0800 +Subject: [PATCH] update DataReader.h + +--- + bolt/include/bolt/Profile/DataReader.h | 83 ++++++++++++++++---------- + 1 file changed, 53 insertions(+), 30 deletions(-) + +diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h +index bf732d47c..9c35c78a2 100644 +--- a/bolt/include/bolt/Profile/DataReader.h ++++ b/bolt/include/bolt/Profile/DataReader.h +@@ -46,12 +46,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { + } + + extern "C" { +-typedef void *(*CreateONNXRunnerFunc)(const char *); +-typedef void (*DeleteONNXRunnerFunc)(void *); +-typedef std::vector (*RunONNXModelFunc)(void *, +- const std::vector &, +- const std::vector &, +- const std::vector &, int); ++typedef void (*CreateONNXRunnerFunc)(const char*); ++typedef void (*addInt64InputFunc)(int64_t*, int); ++typedef int (*inferenceFunc)(); ++typedef void (*clearEngineFunc)(); ++typedef void (*freeEngineFunc)(); ++typedef float *(*getProbabilityFunc)(int, int); ++typedef void (*freeProbabilityFunc)(float*); + } + + struct Location { +@@ -295,32 +296,33 @@ public: + + ~DataReader() { + // delete onnxrunner; +- if (onnxRunner && libHandle && handleOnnxRuntime) { +- DeleteONNXRunnerFunc deleteONNXRunner = +- (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); +- deleteONNXRunner(onnxRunner); ++ if (libHandle) { ++ freeEngineFunc freeEngine = ++ (freeEngineFunc)dlsym(libHandle, "free_engine"); ++ if (!freeEngine) { ++ outs() << "error: llvm-bolt failed during loading free_engine.\n"; ++ exit(1); ++ } ++ freeEngine(); + dlclose(libHandle); +- dlclose(handleOnnxRuntime); + } + } + + /// Initialize the onnxruntime model. + void initializeONNXRunner(const std::string &modelPath) { +- if (!onnxRunner && !libHandle && !handleOnnxRuntime) { +- handleOnnxRuntime = +- dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); +- if (handleOnnxRuntime == nullptr) { +- outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; ++ if (!libHandle) { ++ libHandle = dlopen("ai4c_onnxrunner.so", RTLD_LAZY); ++ if (!libHandle) { ++ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; + exit(1); + } +- libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); +- if (libHandle == nullptr) { +- outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; ++ initEngineFunc initialize = ++ (initEngineFunc)dlsym(libHandle, "initialize"); ++ if (!initialize) { ++ outs() << "error: llvm-bolt failed during loading initialize.\n"; + exit(1); + } +- CreateONNXRunnerFunc createONNXRunner = +- (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); +- onnxRunner = createONNXRunner(modelPath.c_str()); ++ initialize(modelPath.c_str()); + } + } + +@@ -328,16 +330,37 @@ public: + float ONNXInference(const std::vector &input_string, + const std::vector &input_int64, + const std::vector &input_float, int batch_size = 1) { +- if (onnxRunner && libHandle) { +- RunONNXModelFunc runONNXModel = +- (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); +- std::vector model_preds = runONNXModel( +- onnxRunner, input_string, input_int64, input_float, batch_size); +- if (model_preds.size() <= 0) { ++ if (libHandle) { ++ addInt64InputFunc addInt64Input = ++ (addInt64InputFunc)dlsym(libHandle, "add_int64_input"); ++ inferenceFunc inference = (inferenceFunc)dlsym(libHandle, "inference"); ++ getProbabilityFunc getProbability = ++ (getProbabilityFunc)dlsym(libHandle, "get_label_probability"); ++ freeProbabilityFunc freeProbability = ++ (freeProbabilityFunc)dlsym(libHandle, "free_label_probability"); ++ clearEngineFunc clearEngine = ++ (clearEngineFunc)dlsym(libHandle, "clear_engine"); ++ ++ if (!addInt64Input || !inference || !getProbability || !freeProbability || ++ !clearEngine) { ++ outs() << "error: llvm-bolt failed during loading ai4c_onnxrunner.so.\n"; ++ exit(1); ++ } ++ ++ std::vector modelInput(input_int64.begin(), input_int64.end()); ++ addInt64Input(modelInput.data(), modelInput.size()); ++ inference(); ++ ++ // output tensors: {label, label_prob}, label_prob=sequence> ++ float* preds = getProbability(1, 1); ++ if (preds == nullptr) { + outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; + exit(1); + } +- float pred = model_preds[0]; ++ ++ float pred = preds[0]; ++ freeProbability(preds); ++ clearEngine(); + return pred; + } + return -1.0; +@@ -356,7 +379,7 @@ protected: + void *libHandle; + + /// The library handle of the onnxruntime. +- void *handleOnnxRuntime; ++ /// void *handleOnnxRuntime; + + /// The annotating threshold for the model prediction. + float threshold; +-- +2.33.0 + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index c3a748a..39c3033 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -27,7 +27,7 @@ Name: %{pkg_name} Version: %{bolt_version} -Release: 5 +Release: 6 Summary: BOLT is a post-link optimizer developed to speed up large applications License: Apache-2.0 URL: https://github.com/llvm/llvm-project/tree/main/bolt @@ -46,6 +46,7 @@ Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch Patch9: 0009-support-aarch64-instrumentation.patch Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch +Patch13: 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -159,6 +160,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*. %doc %{install_docdir} %changelog +* Tue Apr 15 2025 zengxianghuai 17.0.6-6 +- adapt to ai4c_onnxrunner.so + * Tue Feb 11 2025 rfwang07 17.0.6-5 - Type:backport - ID:NA -- Gitee From 6aa549bec0133a494d7f2531500370305d4d87a6 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Tue, 22 Apr 2025 19:46:53 +0800 Subject: [PATCH 4/4] Add Om for Kunpeng Opts --- 0012-Add-Om-for-Kunpeng-Opts.patch | 380 +++++++++++++++++++++++++++++ llvm-bolt.spec | 9 + 2 files changed, 389 insertions(+) create mode 100644 0012-Add-Om-for-Kunpeng-Opts.patch diff --git a/0012-Add-Om-for-Kunpeng-Opts.patch b/0012-Add-Om-for-Kunpeng-Opts.patch new file mode 100644 index 0000000..0f8a8ee --- /dev/null +++ b/0012-Add-Om-for-Kunpeng-Opts.patch @@ -0,0 +1,380 @@ +From f4757a1bff16b44a329e3f70973ca6c623518291 Mon Sep 17 00:00:00 2001 +From: rfwang07 +Date: Tue, 22 Apr 2025 19:33:52 +0800 +Subject: [PATCH] Add Om for Kunpeng Opts + +--- + bolt/include/bolt/Passes/BinaryPasses.h | 8 +++ + bolt/include/bolt/Passes/SplitFunctions.h | 19 +++++++ + bolt/lib/Core/BinaryFunctionProfile.cpp | 6 +-- + bolt/lib/Passes/BinaryPasses.cpp | 10 +--- + bolt/lib/Passes/IndirectCallPromotion.cpp | 2 +- + bolt/lib/Passes/Inliner.cpp | 2 +- + bolt/lib/Passes/MCF.cpp | 2 +- + bolt/lib/Passes/ReorderFunctions.cpp | 2 +- + bolt/lib/Passes/SplitFunctions.cpp | 23 +-------- + bolt/lib/Passes/VeneerElimination.cpp | 2 +- + bolt/lib/Rewrite/BinaryPassManager.cpp | 6 +-- + bolt/lib/Rewrite/RewriteInstance.cpp | 2 +- + bolt/tools/driver/llvm-bolt.cpp | 61 +++++++++++++++++++++++ + 13 files changed, 104 insertions(+), 41 deletions(-) + +diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h +index dace07e..5a2fe3b 100644 +--- a/bolt/include/bolt/Passes/BinaryPasses.h ++++ b/bolt/include/bolt/Passes/BinaryPasses.h +@@ -23,6 +23,14 @@ + #include + #include + ++namespace opts { ++enum SctcModes : char { ++ SctcAlways, ++ SctcPreserveDirection, ++ SctcHeuristic ++}; ++} ++ + namespace llvm { + namespace bolt { + +diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h +index 4058f33..a8c3a14 100644 +--- a/bolt/include/bolt/Passes/SplitFunctions.h ++++ b/bolt/include/bolt/Passes/SplitFunctions.h +@@ -15,6 +15,25 @@ + #include "llvm/Support/CommandLine.h" + #include + ++using namespace llvm; ++ ++class DeprecatedSplitFunctionOptionParser : public cl::parser { ++public: ++ explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) ++ : cl::parser(O) {} ++ ++ bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { ++ if (Arg == "2" || Arg == "3") { ++ Value = true; ++ errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " ++ "for option -{1} is deprecated\n", ++ Arg, ArgName); ++ return false; ++ } ++ return cl::parser::parse(O, ArgName, Arg, Value); ++ } ++}; ++ + namespace llvm { + namespace bolt { + +diff --git a/bolt/lib/Core/BinaryFunctionProfile.cpp b/bolt/lib/Core/BinaryFunctionProfile.cpp +index 0d705cd..c062f8d 100644 +--- a/bolt/lib/Core/BinaryFunctionProfile.cpp ++++ b/bolt/lib/Core/BinaryFunctionProfile.cpp +@@ -44,17 +44,17 @@ static cl::alias ICPAlias("icp", + + extern cl::opt JumpTables; + +-static cl::opt FixFuncCounts( ++cl::opt FixFuncCounts( + "fix-func-counts", + cl::desc("adjust function counts based on basic blocks execution count"), + cl::Hidden, cl::cat(BoltOptCategory)); + +-static cl::opt FixBlockCounts( ++cl::opt FixBlockCounts( + "fix-block-counts", + cl::desc("adjust block counts based on outgoing branch counts"), + cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); + +-static cl::opt ++cl::opt + InferFallThroughs("infer-fall-throughs", + cl::desc("infer execution count for fall-through blocks"), + cl::Hidden, cl::cat(BoltOptCategory)); +diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp +index a674fb4..1db25ff 100644 +--- a/bolt/lib/Passes/BinaryPasses.cpp ++++ b/bolt/lib/Passes/BinaryPasses.cpp +@@ -88,7 +88,7 @@ static cl::opt MinBranchClusters( + "branches"), + cl::Hidden, cl::cat(BoltOptCategory)); + +-static cl::list Peepholes( ++cl::list Peepholes( + "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"), + cl::value_desc("opt1,opt2,opt3,..."), + cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"), +@@ -176,13 +176,7 @@ static cl::opt + cl::desc("print the list of functions with stale profile"), + cl::Hidden, cl::cat(BoltOptCategory)); + +-enum SctcModes : char { +- SctcAlways, +- SctcPreserveDirection, +- SctcHeuristic +-}; +- +-static cl::opt ++cl::opt + SctcMode("sctc-mode", + cl::desc("mode for simplify conditional tail calls"), + cl::init(SctcAlways), +diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp +index ea80194..4ff6837 100644 +--- a/bolt/lib/Passes/IndirectCallPromotion.cpp ++++ b/bolt/lib/Passes/IndirectCallPromotion.cpp +@@ -72,7 +72,7 @@ static cl::alias ICPMispredictThresholdAlias( + cl::desc("alias for --indirect-call-promotion-mispredict-threshold"), + cl::aliasopt(ICPMispredictThreshold)); + +-static cl::opt ICPUseMispredicts( ++cl::opt ICPUseMispredicts( + "indirect-call-promotion-use-mispredicts", + cl::desc("use misprediction frequency for determining whether or not ICP " + "should be applied at a callsite. The " +diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp +index 67dd294..7fdd780 100644 +--- a/bolt/lib/Passes/Inliner.cpp ++++ b/bolt/lib/Passes/Inliner.cpp +@@ -50,7 +50,7 @@ ForceInlineFunctions("force-inline", + cl::Hidden, + cl::cat(BoltOptCategory)); + +-static cl::opt InlineAll("inline-all", cl::desc("inline all functions"), ++cl::opt InlineAll("inline-all", cl::desc("inline all functions"), + cl::cat(BoltOptCategory)); + + static cl::opt InlineIgnoreLeafCFI( +diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp +index a6455bb..bd98286 100644 +--- a/bolt/lib/Passes/MCF.cpp ++++ b/bolt/lib/Passes/MCF.cpp +@@ -31,7 +31,7 @@ extern cl::OptionCategory BoltOptCategory; + + extern cl::opt TimeOpts; + +-static cl::opt IterativeGuess( ++cl::opt IterativeGuess( + "iterative-guess", + cl::desc("in non-LBR mode, guess edge counts using iterative technique"), + cl::Hidden, cl::cat(BoltOptCategory)); +diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp +index 2fc99f6..359cd05 100644 +--- a/bolt/lib/Passes/ReorderFunctions.cpp ++++ b/bolt/lib/Passes/ReorderFunctions.cpp +@@ -98,7 +98,7 @@ static cl::opt CgIgnoreRecursiveCalls( + cl::desc("ignore recursive calls when constructing the call graph"), + cl::init(true), cl::cat(BoltOptCategory)); + +-static cl::opt ++cl::opt + CgUseSplitHotSize("cg-use-split-hot-size", + cl::desc("use hot/cold data on basic blocks to determine hot sizes for " + "call graph functions"), +diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp +index 34973ce..e934b75 100644 +--- a/bolt/lib/Passes/SplitFunctions.cpp ++++ b/bolt/lib/Passes/SplitFunctions.cpp +@@ -34,25 +34,6 @@ + using namespace llvm; + using namespace bolt; + +-namespace { +-class DeprecatedSplitFunctionOptionParser : public cl::parser { +-public: +- explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) +- : cl::parser(O) {} +- +- bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { +- if (Arg == "2" || Arg == "3") { +- Value = true; +- errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " +- "for option -{1} is deprecated\n", +- Arg, ArgName); +- return false; +- } +- return cl::parser::parse(O, ArgName, Arg, Value); +- } +-}; +-} // namespace +- + namespace opts { + + extern cl::OptionCategory BoltOptCategory; +@@ -61,7 +42,7 @@ extern cl::opt SplitEH; + extern cl::opt ExecutionCountThreshold; + extern cl::opt RandomSeed; + +-static cl::opt AggressiveSplitting( ++cl::opt AggressiveSplitting( + "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), + cl::cat(BoltOptCategory)); + +@@ -74,7 +55,7 @@ static cl::opt SplitAlignThreshold( + + cl::Hidden, cl::cat(BoltOptCategory)); + +-static cl::opt ++cl::opt + SplitFunctions("split-functions", + cl::desc("split functions into fragments"), + cl::cat(BoltOptCategory)); +diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp +index eadbfc1..611d027 100644 +--- a/bolt/lib/Passes/VeneerElimination.cpp ++++ b/bolt/lib/Passes/VeneerElimination.cpp +@@ -20,7 +20,7 @@ namespace opts { + + extern cl::OptionCategory BoltOptCategory; + +-static llvm::cl::opt ++llvm::cl::opt + EliminateVeneers("elim-link-veneers", + cl::desc("run veneer elimination pass"), cl::init(true), + cl::Hidden, cl::cat(BoltOptCategory)); +diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp +index 517984d..c231037 100644 +--- a/bolt/lib/Rewrite/BinaryPassManager.cpp ++++ b/bolt/lib/Rewrite/BinaryPassManager.cpp +@@ -58,7 +58,7 @@ DynoStatsAll("dyno-stats-all", + cl::desc("print dyno stats after each stage"), + cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); + +-static cl::opt ++cl::opt + EliminateUnreachable("eliminate-unreachable", + cl::desc("eliminate unreachable code"), cl::init(true), + cl::cat(BoltOptCategory)); +@@ -207,12 +207,12 @@ static cl::opt RegReAssign( + "reassign registers so as to avoid using REX prefixes in hot code"), + cl::cat(BoltOptCategory)); + +-static cl::opt SimplifyConditionalTailCalls( ++cl::opt SimplifyConditionalTailCalls( + "simplify-conditional-tail-calls", + cl::desc("simplify conditional tail calls by removing unnecessary jumps"), + cl::init(true), cl::cat(BoltOptCategory)); + +-static cl::opt SimplifyRODataLoads( ++cl::opt SimplifyRODataLoads( + "simplify-rodata-loads", + cl::desc("simplify loads from read-only sections by replacing the memory " + "operand with the constant found in the corresponding section"), +diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp +index 255c1ae..7a931db 100644 +--- a/bolt/lib/Rewrite/RewriteInstance.cpp ++++ b/bolt/lib/Rewrite/RewriteInstance.cpp +@@ -234,7 +234,7 @@ static cl::opt DWPPathName("dwp", + cl::Hidden, cl::init(""), + cl::cat(BoltCategory)); + +-static cl::opt ++cl::opt + UseGnuStack("use-gnu-stack", + cl::desc("use GNU_STACK program header for new segment (workaround for " + "issues with strip/objcopy)"), +diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp +index 5a3af6a..8cba4d2 100644 +--- a/bolt/tools/driver/llvm-bolt.cpp ++++ b/bolt/tools/driver/llvm-bolt.cpp +@@ -26,6 +26,10 @@ + #include "llvm/Support/PrettyStackTrace.h" + #include "llvm/Support/Signals.h" + #include "llvm/Support/TargetSelect.h" ++#include "bolt/Passes/BinaryPasses.h" ++#include "bolt/Passes/ReorderFunctions.h" ++#include "bolt/Passes/SplitFunctions.h" ++#include "bolt/Passes/TailDuplication.h" + + #define DEBUG_TYPE "bolt" + +@@ -35,6 +39,28 @@ using namespace bolt; + + namespace opts { + ++extern cl::opt ReorderBlocks; ++extern cl::opt ReorderFunctions; ++extern cl::opt SplitFunctions; ++extern cl::opt AggressiveSplitting; ++extern cl::opt ICF; ++extern cl::opt UseGnuStack; ++extern cl::opt InlineAll; ++extern cl::opt InferFallThroughs; ++extern cl::opt SimplifyConditionalTailCalls; ++extern cl::opt SimplifyRODataLoads; ++extern cl::opt ICPUseMispredicts; ++extern cl::opt EliminateVeneers; ++extern cl::opt EliminateUnreachable; ++extern cl::opt FixBlockCounts; ++extern cl::opt FixFuncCounts; ++extern cl::opt SctcMode; ++extern cl::opt AlignBlocks; ++extern cl::opt CgUseSplitHotSize; ++extern cl::opt TailDuplicationMode; ++extern cl::opt IterativeGuess; ++extern cl::opt AssumeABI; ++ + static cl::OptionCategory *BoltCategories[] = {&BoltCategory, + &BoltOptCategory, + &BoltRelocCategory, +@@ -69,6 +95,12 @@ InputDataFilename2("data2", + cl::Optional, + cl::cat(BoltCategory)); + ++static cl::opt ++Om("Om", ++ cl::desc("Kunpeng optimization"), ++ cl::ZeroOrMore, ++ cl::cat(BoltOptCategory)); ++ + static cl::opt + InputFilename2( + cl::Positional, +@@ -152,6 +184,34 @@ void boltDiffMode(int argc, char **argv) { + opts::DiffOnly = true; + } + ++void handleOptionOm() { ++ if (!opts::Om) { ++ return; ++ } ++ ++ opts::ReorderBlocks = ReorderBasicBlocks::LT_OPTIMIZE_EXT_TSP; // -reorder-blocks=ext-tsp ++ opts::ReorderFunctions = ReorderFunctions::RT_HFSORT_PLUS; // -reorder-functions=hfsort+ ++ opts::SplitFunctions = true; // -split-functions ++ opts::AggressiveSplitting = true; // -split-all-cold ++ opts::ICF = true; // -icf=1 ++ opts::UseGnuStack = true; // -use-gnu-stack ++ opts::InlineAll = true; // --inline-all ++ opts::InferFallThroughs = true; // --infer-fall-throughs ++ opts::SimplifyConditionalTailCalls = true; // --simplify-conditional-tail-calls ++ opts::SimplifyRODataLoads = true; // --simplify-rodata-loads ++ opts::ICPUseMispredicts = true; // --indirect-call-promotion-use-mispredicts ++ opts::EliminateVeneers = true; // --elim-link-veneers ++ opts::EliminateUnreachable = true; // --eliminate-unreachable ++ opts::FixBlockCounts = true; // --fix-block-counts ++ opts::FixFuncCounts = true; // --fix-func-counts ++ opts::SctcMode = opts::SctcModes::SctcPreserveDirection; // --sctc-mode=preserve ++ opts::AlignBlocks = true; // --align-blocks ++ opts::CgUseSplitHotSize = true; // --cg-use-split-hot-size ++ opts::TailDuplicationMode = TailDuplication::TD_AGGRESSIVE; // --tail-duplication=aggressive ++ opts::IterativeGuess = true; // --iterative-guess ++ opts::AssumeABI = true; // --assume-abi ++} ++ + void boltMode(int argc, char **argv) { + cl::HideUnrelatedOptions(ArrayRef(opts::BoltCategories)); + // Register the target printer for --version. +@@ -160,6 +220,7 @@ void boltMode(int argc, char **argv) { + + cl::ParseCommandLineOptions(argc, argv, + "BOLT - Binary Optimization and Layout Tool\n"); ++ handleOptionOm(); + + if (opts::OutputFilename.empty()) { + errs() << ToolName << ": expected -o= option.\n"; +-- +2.33.0 + diff --git a/llvm-bolt.spec b/llvm-bolt.spec index 39c3033..651f0ac 100644 --- a/llvm-bolt.spec +++ b/llvm-bolt.spec @@ -46,6 +46,7 @@ Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch Patch9: 0009-support-aarch64-instrumentation.patch Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch +Patch12: 0012-Add-Om-for-Kunpeng-Opts.patch Patch13: 0013-AArch64-Adapt-to-ai4c_onnxrunner.patch BuildRequires: gcc @@ -160,8 +161,16 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{__cmake_builddir}/%{_lib}/lib*. %doc %{install_docdir} %changelog +<<<<<<< HEAD * Tue Apr 15 2025 zengxianghuai 17.0.6-6 - adapt to ai4c_onnxrunner.so +======= +* Fri Apr 25 2025 rfwang07 17.0.6-6 +- Type:backport +- ID:NA +- SUG:NA +- DESC: Add Om for Kunpeng Opts +>>>>>>> a676ed9 (Add Om for Kunpeng Opts) * Tue Feb 11 2025 rfwang07 17.0.6-5 - Type:backport -- Gitee