From 587731a0c1259ca8943d77cad72a708ad5780e17 Mon Sep 17 00:00:00 2001 From: Mingchuan Wu Date: Fri, 21 Jun 2024 20:53:04 +0800 Subject: [PATCH] Syn patch from openeuler/gcc --- ...in-detecting-abnormal-revisit-for-bb.patch | 24 + ...-reg_set_p-and-add-check-for-global_.patch | 29 + ...-erroneous-pattern-from-gimple-ifcvt.patch | 54 ++ ...o-use-AI-model-to-guide-optimization.patch | 678 ++++++++++++++++++ gcc.spec | 16 +- 5 files changed, 800 insertions(+), 1 deletion(-) create mode 100644 0201-Fix-bug-in-detecting-abnormal-revisit-for-bb.patch create mode 100644 0202-Change-set_of-to-reg_set_p-and-add-check-for-global_.patch create mode 100644 0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch create mode 100644 0204-Try-to-use-AI-model-to-guide-optimization.patch diff --git a/0201-Fix-bug-in-detecting-abnormal-revisit-for-bb.patch b/0201-Fix-bug-in-detecting-abnormal-revisit-for-bb.patch new file mode 100644 index 0000000..0e149fa --- /dev/null +++ b/0201-Fix-bug-in-detecting-abnormal-revisit-for-bb.patch @@ -0,0 +1,24 @@ +From 69cae9164ec85ef416c29baa3dd8b2b11edf8b71 Mon Sep 17 00:00:00 2001 +From: yzyssdd +Date: Tue, 18 Jun 2024 11:16:15 +0800 +Subject: [PATCH] Fix bug in detecting abnormal revisit for bb + +--- + gcc/tree-ssa-llc-allocate.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c +index 3f6ff3623..a1e7b0eb6 100644 +--- a/gcc/tree-ssa-llc-allocate.c ++++ b/gcc/tree-ssa-llc-allocate.c +@@ -2348,7 +2348,6 @@ revisit_bb_abnormal_p (basic_block bb, std::vector &bb_visited, + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Already visited bb index %d. Abort.\n", + bb->index); +- unused_edges.insert (std::make_pair (src_bb_idx, bb->index)); + return true; + } + +-- +2.33.0 + diff --git a/0202-Change-set_of-to-reg_set_p-and-add-check-for-global_.patch b/0202-Change-set_of-to-reg_set_p-and-add-check-for-global_.patch new file mode 100644 index 0000000..2ce8ad9 --- /dev/null +++ b/0202-Change-set_of-to-reg_set_p-and-add-check-for-global_.patch @@ -0,0 +1,29 @@ +From 50812a995ffda86acaab56f94ae99732b2a99c85 Mon Sep 17 00:00:00 2001 +From: Chernonog Viacheslav +Date: Wed, 8 May 2024 19:24:27 +0800 +Subject: [PATCH] Change set_of to reg_set_p, and add check for global_regs + fix for I9JDHE + +--- + gcc/rtl-matcher.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/rtl-matcher.h b/gcc/rtl-matcher.h +index 6aed8d98d..5310f6266 100644 +--- a/gcc/rtl-matcher.h ++++ b/gcc/rtl-matcher.h +@@ -56,8 +56,9 @@ check_def_chain_ref (df_ref ref, rtx reg) + if (!ref || !DF_REF_INSN_INFO (ref)) + return false; + +- return !global_regs[REGNO (reg)] +- || set_of (reg, DF_REF_INSN (ref)); ++ return !(REGNO (reg) < FIRST_PSEUDO_REGISTER ++ && global_regs[REGNO (reg)]) ++ || reg_set_p (reg, DF_REF_INSN (ref)); + } + + /* Get the single def instruction of the reg being used in the insn. */ +-- +2.33.0 + diff --git a/0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch b/0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch new file mode 100644 index 0000000..991c50f --- /dev/null +++ b/0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch @@ -0,0 +1,54 @@ +From 0dcab0809ada30b09f553dbbe5b2df8d830e95f3 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 +Date: Thu, 13 Jun 2024 22:04:16 +0800 +Subject: [PATCH] Remove erroneous pattern from gimple ifcvt + +--- + gcc/match.pd | 2 +- + gcc/testsuite/gcc.dg/ifcvt-gimple-1.c | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple-1.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 1097cd926..efd2db209 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3450,7 +3450,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + + (if (flag_if_conversion_gimple) +- (for simple_op (plus minus bit_and bit_ior bit_xor) ++ (for simple_op (plus minus bit_ior bit_xor) + (simplify + (cond @0 (simple_op @1 INTEGER_CST@2) @1) + (switch +diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +new file mode 100644 +index 000000000..381a4ad51 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fno-inline -fif-conversion-gimple" } */ ++ ++#include ++ ++void foo(int a, int *p) { ++ *p = a; ++} ++ ++void verify (int a) { ++ if (a != 3) ++ abort (); ++} ++ ++int main() { ++ int a = 0; ++ foo (3, &a); ++ int tmp = (a > 7) ? a & 1 : a; ++ verify (tmp); ++ return 0; ++} +-- +2.33.0 + diff --git a/0204-Try-to-use-AI-model-to-guide-optimization.patch b/0204-Try-to-use-AI-model-to-guide-optimization.patch new file mode 100644 index 0000000..3a25dd6 --- /dev/null +++ b/0204-Try-to-use-AI-model-to-guide-optimization.patch @@ -0,0 +1,678 @@ +From 25014ea924bfe3659e88195636ec08f87dd72c07 Mon Sep 17 00:00:00 2001 +From: Mingchuan Wu +Date: Fri, 21 Jun 2024 20:26:04 +0800 +Subject: [PATCH] Try to use AI model to guide optimization. + +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 7 ++ + gcc/config/aarch64/aarch64.c | 130 ++++++++++++++++++++ + gcc/ipa-hardware-detection.c | 228 +++++++++++++++++++++++++++++++++++ + gcc/opts-common.c | 154 +++++++++++++++++++++++ + gcc/opts.c | 11 ++ + gcc/passes.def | 1 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 2 + + 9 files changed, 535 insertions(+) + create mode 100644 gcc/ipa-hardware-detection.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index aed321d27..f21bc5f9a 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1398,6 +1398,7 @@ OBJS = \ + inchash.o \ + incpath.o \ + init-regs.o \ ++ ipa-hardware-detection.o \ + internal-fn.o \ + ipa-struct-reorg/ipa-struct-reorg.o \ + ipa-cp.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index aad6fb281..9b32ea50a 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -188,6 +188,9 @@ const char *main_input_basename + Variable + int main_input_baselength + ++Variable ++bool optimize_machine ++ + ; Which options have been printed by --help. + Variable + char *help_printed +@@ -467,6 +470,10 @@ Ofast + Common Optimization + Optimize for speed disregarding exact standards compliance. + ++Om ++Common Optimization ++Optimize for radical optimization for machines. ++ + Og + Common Optimization + Optimize for debugging experience rather than speed or size. +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 2117326ba..e67e77e6a 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -14416,6 +14416,135 @@ aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, + return stmt_cost; + } + ++/* Check whether in C language or LTO with only C language. */ ++extern bool lang_c_p (void); ++ ++static void ++override_C_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_ipa_reorder_fields = 1; ++ opts->x_flag_ipa_struct_reorg = 6; ++ opts->x_struct_layout_optimize_level = 6; ++ opts->x_flag_gnu89_inline = 1; ++ opts->x_flag_ccmp2 = 1; ++ opts->x_flag_array_widen_compare = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_flag_tree_slp_transpose_vectorize = 1; ++ opts->x_param_max_inline_insns_auto = 64; ++ opts->x_param_inline_unit_growth = 96; ++ opts->x_flag_cmlt_arith = 1; ++} ++ ++/* Check whether in CPP language or LTO with only CPP language. */ ++static bool ++lang_cpp_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ if (!language_string) ++ { ++ return false; ++ } ++ if (lang_GNU_CXX ()) ++ { ++ return true; ++ } ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check ++ { ++ unsigned i = 0; ++ tree t = NULL_TREE; ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (lang_hooks.name, "GNU C++", 7)) ++ { ++ return false; ++ } ++ } ++ return true; ++ } ++ return false; ++} ++ ++static void ++override_CPP_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_finite_loops = 1; ++ opts->x_flag_omit_frame_pointer = 1; ++ opts->x_flag_sized_deallocation = 0; ++ opts->x_flag_loop_elim = 1; ++ opts->x_flag_convert_minmax = 1; ++ opts->x_param_early_inlining_insns = 256; ++ opts->x_param_max_inline_insns_auto = 128; ++ opts->x_param_inline_unit_growth = 256; ++ opts->x_flag_cmlt_arith = 1; ++} ++ ++static void ++override_optimize_options_1 (struct gcc_options *opts) ++{ ++ opts->x_flag_split_ldp_stp = 1; ++ opts->x_flag_if_conversion_gimple = 1; ++ opts->x_param_tree_forwprop_perm = 1; ++ opts->x_flag_ifcvt_allow_complicated_cmps = 1; ++ opts->x_param_ifcvt_allow_register_renaming = 2; ++ opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48; ++ opts->x_param_max_rtl_if_conversion_predictable_cost = 48; ++} ++ ++static void ++override_Fortran_optimize_options (struct gcc_options *opts) ++{ ++ opts->x_flag_unroll_loops = 1; ++ opts->x_flag_unconstrained_commons = 1; ++ opts->x_param_ipa_cp_eval_threshold = 1; ++ opts->x_param_ipa_cp_unit_growth = 80; ++ opts->x_param_ipa_cp_max_recursive_depth = 8; ++ opts->x_param_large_unit_insns = 30000; ++ opts->x_flag_ira_loop_pressure = 1; ++ opts->x_flag_inline_functions_called_once = 0; ++ opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY; ++ opts->x_flag_delayed_branch = 1; ++ opts->x_flag_gcse_las = 1; ++ opts->x_flag_gcse_sm = 1; ++ opts->x_flag_ipa_pta = 1; ++ opts->x_flag_reorder_blocks_and_partition = 1; ++ opts->x_flag_reorder_blocks = 1; ++ opts->x_flag_crypto_accel_aes = 1; ++ opts->x_param_flexible_seg_len = 1; ++} ++ ++/* Reset the optimize option. ++ After checking the model result, this function can ++ reset the more appropriate options. */ ++static void ++reset_machine_option (struct gcc_options *opts) ++{ ++ if (!(opts->x_optimize_machine) ++ || strstr (opts->x_aarch64_tune_string, "hip09") == NULL) ++ { ++ return; ++ } ++ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ if (ai_infer_level) ++ { ++ override_optimize_options_1 (opts); ++ if (lang_c_p ()) ++ { ++ override_C_optimize_options (opts); ++ } ++ else if (lang_cpp_p ()) ++ { ++ override_CPP_optimize_options (opts); ++ } ++ else if (lang_GNU_Fortran ()) ++ { ++ override_Fortran_optimize_options (opts); ++ } ++ } ++} ++ + /* Implement targetm.vectorize.add_stmt_cost. */ + static unsigned + aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, +@@ -15060,6 +15189,7 @@ aarch64_override_options_internal (struct gcc_options *opts) + if (opts->x_aarch64_tune_string == NULL) + opts->x_aarch64_tune_string = selected_tune->name; + ++ reset_machine_option (opts); + aarch64_override_options_after_change_1 (opts); + } + +diff --git a/gcc/ipa-hardware-detection.c b/gcc/ipa-hardware-detection.c +new file mode 100644 +index 000000000..f127ebe2c +--- /dev/null ++++ b/gcc/ipa-hardware-detection.c +@@ -0,0 +1,228 @@ ++/* Hardware Detection. ++ Copyright (C) 2022-2022 Free Software Foundation, Inc. ++This file is part of GCC. ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License ++for more details. ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3.  If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++namespace { ++ ++static basic_block ++create_abort_bb (basic_block last_bb) ++{ ++ basic_block bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ tree fn = builtin_decl_implicit (BUILT_IN_ABORT); ++ gimple *g = gimple_build_call (fn, 0); ++ gsi_insert_after (&gsi, g, GSI_NEW_STMT); ++ return bb; ++} ++ ++static basic_block ++create_part_bb (basic_block last_bb, tree part_base) ++{ ++ basic_block bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ /* This number is used to efficiently identify the supported part range. */ ++ tree part_cond = gimplify_build2 ( ++ &gsi, PLUS_EXPR, unsigned_type_node, part_base, ++ build_int_cst (unsigned_type_node, 4294963967)); ++ gcond *cond = gimple_build_cond (LE_EXPR, part_cond, ++ build_int_cst (unsigned_type_node, 2), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, input_location); ++ gsi_insert_before (&gsi, cond, GSI_SAME_STMT); ++ gsi_remove (&gsi, true); ++ return bb; ++} ++ ++static void ++create_detection_bb () ++{ ++ edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ basic_block ret_bb = old_e->dest; ++ ++ basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL) ++ { ++ add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ tree cpuid_decl = build_decl (input_location, VAR_DECL, ++ get_identifier ("cpuid"), unsigned_type_node); ++ add_local_decl (cfun, cpuid_decl); ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (detection_bb); ++ vec *outputs = NULL; ++ tree purpose = build_string (strlen ("=r"), "=r"); ++ tree output = build_tree_list ( ++ build_tree_list (NULL_TREE, purpose), cpuid_decl); ++ vec_safe_push (outputs, output); ++ gasm *asm_stmt = gimple_build_asm_vec ( ++ "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL); ++ gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT); ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ ++ tree implementer = gimplify_build2 ( ++ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl, ++ build_int_cst (unsigned_type_node, 24)); ++ tree part_base = gimplify_build2 ( ++ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl, ++ build_int_cst (unsigned_type_node, 4)); ++ tree part = gimplify_build2 ( ++ &gsi, BIT_AND_EXPR, unsigned_type_node, part_base, ++ build_int_cst (unsigned_type_node, 4095)); ++ gcond *implementer_cond = gimple_build_cond ( ++ EQ_EXPR, implementer, ++ build_int_cst (unsigned_type_node, 72), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (implementer_cond, input_location); ++ gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT); ++ gsi_remove (&gsi, true); ++ ++ basic_block part_bb = create_part_bb (detection_bb, part); ++ basic_block abort_bb = create_abort_bb (part_bb); ++ ++ remove_edge_raw (old_e); ++ make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), ++ detection_bb, EDGE_FALLTHRU); ++ edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::likely (); ++ edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::unlikely (); ++ edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE); ++ part_true->probability = profile_probability::likely (); ++ edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE); ++ part_false->probability = profile_probability::unlikely (); ++ make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU); ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb); ++ set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb); ++ set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb); ++ } ++} ++ ++const pass_data pass_data_ipa_hardware_detection = ++{ ++ SIMPLE_IPA_PASS, ++ "hardware_detection", ++ OPTGROUP_NONE, ++ TV_IPA_HARDWARE_DETECTION, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_ipa_hardware_detection : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_hardware_detection (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_hardware_detection, ctxt) ++ {} ++ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++}; // class pass_ipa_hardware_detection ++ ++bool ++pass_ipa_hardware_detection::gate (function *) ++{ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ return (ai_infer_level ++ && optimize_machine > 0 ++ /* Only enable in lto or whole_program.  */ ++ && (in_lto_p || flag_whole_program)); ++} ++ ++unsigned int ++pass_ipa_hardware_detection::execute (function *) ++{ ++ unsigned int ret = 0; ++ cgraph_node *cnode; ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p ()) ++ { ++ continue; ++ } ++ if (cnode->definition) ++ { ++ if (!cnode->has_gimple_body_p () || cnode->inlined_to) ++ continue; ++ ++ cnode->get_body (); ++ function *fn = DECL_STRUCT_FUNCTION (cnode->decl); ++ if (!fn) ++ continue; ++ ++ if (DECL_NAME (cnode->decl) ++ && MAIN_NAME_P (DECL_NAME (cnode->decl))) ++ { ++ push_cfun (fn); ++ calculate_dominance_info (CDI_DOMINATORS); ++ ++ create_detection_bb (); ++ ++ cgraph_edge::rebuild_edges (); ++ free_dominance_info (CDI_DOMINATORS); ++ pop_cfun (); ++ } ++ } ++ } ++ return ret; ++} ++} // anon namespace ++ ++simple_ipa_opt_pass * ++make_pass_ipa_hardware_detection (gcc::context *ctxt) ++{ ++ return new pass_ipa_hardware_detection (ctxt); ++} +diff --git a/gcc/opts-common.c b/gcc/opts-common.c +index bf82b05c8..52e28e2dc 100644 +--- a/gcc/opts-common.c ++++ b/gcc/opts-common.c +@@ -926,6 +926,158 @@ opts_concat (const char *first, ...) + return newstr; + } + ++typedef int64_t (*run_ai_model_func)(int, const char **, ++ const char *, int, int64_t *); ++#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; } ++#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q) ++#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq) ++ ++static int64_t ++ai_infer_optimization (int argc, const char **argv, ++ const char *mcpu_option, ++ int argc_hw, int64_t *argv_hw) ++{ ++ /* Load dependent AI-framework libraries. */ ++ void *onnxruntime_lib_handle = NULL; ++ const char *onnxruntime_lib_path = "libonnxruntime.so"; ++ ++ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path, ++ RTLD_LAZY | RTLD_GLOBAL); ++ if (!onnxruntime_lib_handle) ++ { ++ return -1; ++ } ++ ++ void *ai4c_lib_handle = NULL; ++ const char *ai4c_lib_path = "libONNXRunner.so"; ++ ++ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL); ++ if (!ai4c_lib_handle) ++ { ++ return -1; ++ } ++ ++ /* Clear any existing error. */ ++ dlerror (); ++ ++ /* Run AI4Compiler model. */ ++ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL) ++ { ++ return -1; ++ } ++ ++ run_ai_model_func run_ai_model; ++ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union; ++ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union) ++ = dlsym (ai4c_lib_handle, "runONNXModelOptimizer"); ++ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union); ++ if (!run_ai_model) ++ { ++ dlclose (ai4c_lib_handle); ++ dlclose (onnxruntime_lib_handle); ++ return -1; ++ } ++ int64_t model_pred = (*run_ai_model) (argc, argv, ++ mcpu_option, argc_hw, argv_hw); ++ ++ if (ai4c_lib_handle) ++ dlclose (ai4c_lib_handle); ++ ++ if (onnxruntime_lib_handle) ++ dlclose (onnxruntime_lib_handle); ++ ++ if (model_pred == 1) ++ putenv ("AI_INFER_LEVEL=1"); ++ return model_pred; ++} ++ ++static int ++handle_lto_option (unsigned int lang_mask, ++ unsigned int num_decoded_options, ++ unsigned int argc, ++ const char **argv, ++ struct cl_decoded_option *&opt_array) ++{ ++ int ret = 0; ++ char *lan = ""; ++ char *compiler = xstrdup (argv[0]); ++ lan = strrchr (compiler, '/'); ++ if (lan != NULL) ++ lan ++; ++ else ++ lan = compiler; ++ if (strstr (lan, "gcc") != NULL) ++ { ++ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2); ++ const char* lto_flag = "-flto=8"; ++ decode_cmdline_option (<o_flag, lang_mask, ++ &opt_array[num_decoded_options]); ++ ret++; ++ const char* ltopartition_flag = "-flto-partition=one"; ++ decode_cmdline_option (<opartition_flag, lang_mask, ++ &opt_array[num_decoded_options + 1]); ++ ret++; ++ } ++ else if (strstr (lan, "g++") != NULL ++ || strstr (lan, "gfortran") != NULL) ++ { ++ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1); ++ const char* lto_flag = "-flto=8"; ++ decode_cmdline_option (<o_flag, lang_mask, ++ &opt_array[num_decoded_options]); ++ ret++; ++ } ++ if (compiler) ++ free (compiler); ++ return ret; ++} ++ ++static int ++handle_machine_option (unsigned int lang_mask, ++ unsigned int num_decoded_options, ++ unsigned int argc, ++ const char **argv, ++ struct cl_decoded_option *&opt_array) ++{ ++ int ret = 0; ++ bool flag_Om = false; ++ bool flag_hip09 = false; ++ for (unsigned i = 1; i < argc; i ++) ++ { ++ if (strcmp (argv[i], "-Om") == 0) ++ flag_Om = true; ++ if (strstr (argv[i], "mcpu=hip09") != NULL) ++ flag_hip09 = true; ++ } ++ if (!flag_hip09 || !flag_Om) ++ { ++ return ret; ++ } ++ ++ const char *ai_infer_level = getenv ("AI_INFER_LEVEL"); ++ if (ai_infer_level) ++ { ++ return ret; ++ } ++ int argc_hw = 6; ++ int64_t argv_hw[argc_hw] = { ++ global_options.x_param_simultaneous_prefetches, ++ global_options.x_param_l1_cache_size, ++ global_options.x_param_l1_cache_line_size, ++ global_options.x_param_l2_cache_size, ++ global_options.x_param_llc_capacity_per_core, ++ global_options.x_param_ipa_prefetch_distance_factor}; ++ int64_t output_pred = ai_infer_optimization ( ++ argc, argv, "hip09", argc_hw, argv_hw); ++ if (output_pred != 1) ++ { ++ return ret; ++ } ++ ++ return handle_lto_option (lang_mask, num_decoded_options, ++ argc, argv, opt_array); ++} ++ + /* Decode command-line options (ARGC and ARGV being the arguments of + main) into an array, setting *DECODED_OPTIONS to a pointer to that + array and *DECODED_OPTIONS_COUNT to the number of entries in the +@@ -987,6 +1139,8 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, + num_decoded_options++; + } + ++ num_decoded_options += handle_machine_option (lang_mask, num_decoded_options, ++ argc, argv, opt_array); + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; + prune_options (decoded_options, decoded_options_count, lang_mask); +diff --git a/gcc/opts.c b/gcc/opts.c +index c0ccd0853..dc61216c0 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -632,6 +632,15 @@ default_options_optimization (struct gcc_options *opts, + opts->x_optimize_debug = 1; + break; + ++ case OPT_Om: ++ /* -Om adds flags to -O3 & -Ofast. */ ++ opts->x_optimize_size = 0; ++ opts->x_optimize = 3; ++ opts->x_optimize_fast = 1; ++ opts->x_optimize_machine = true; ++ opts->x_optimize_debug = 0; ++ break; ++ + case OPT_fopenacc: + if (opt->value) + openacc_mode = true; +@@ -2378,6 +2387,8 @@ common_handle_option (struct gcc_options *opts, + opts->x_flag_sanitize_coverage, value, true); + break; + ++ case OPT_Om: ++ break; + case OPT_O: + case OPT_Os: + case OPT_Ofast: +diff --git a/gcc/passes.def b/gcc/passes.def +index b6006de22..8898b72fc 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -177,6 +177,7 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_materialize_all_clones); ++ NEXT_PASS (pass_ipa_hardware_detection); + NEXT_PASS (pass_ipa_pta); + /* FIXME: this should a normal IP pass */ + NEXT_PASS (pass_ipa_struct_reorg); +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 929e9e1d3..66b21f166 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_HARDWARE_DETECTION, "ipa detection") + DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 4d952884d..d3a41d0d5 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -513,6 +513,8 @@ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_hardware_detection (gcc::context * ++ ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index 2429d9d..ec89cd9 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: 61 +Release: 62 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -308,6 +308,10 @@ Patch197: 0197-Add-hip09-machine-discribtion.patch Patch198: 0198-bugfix-Modify-the-hip09-CPU-information.patch Patch199: 0199-Backport-Improve-hard-reg-preference-propapagation.patch Patch200: 0200-Support-LTO-in-AutoBOLT-mode.patch +Patch201: 0201-Fix-bug-in-detecting-abnormal-revisit-for-bb.patch +Patch202: 0202-Change-set_of-to-reg_set_p-and-add-check-for-global_.patch +Patch203: 0203-Remove-erroneous-pattern-from-gimple-ifcvt.patch +Patch204: 0204-Try-to-use-AI-model-to-guide-optimization.patch %global gcc_target_platform %{_arch}-linux-gnu %if %{build_go} @@ -960,6 +964,10 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch198 -p1 %patch199 -p1 %patch200 -p1 +%patch201 -p1 +%patch202 -p1 +%patch203 -p1 +%patch204 -p1 %build @@ -2994,6 +3002,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Jun 21 2024 Mingchuan Wu - 10.3.1-62 +- Type:Sync +- ID:NA +- SUG:NA +- DESC: Sync patch from openeuler/gcc + * Tue Jun 18 2024 chenyuanfeng - 10.3.1-61 - Type: Fix - ID:NA -- Gitee