diff --git a/GCC14-1006-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch b/GCC14-1006-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..c0ba8038ebbc43703c8dbdd9af327b62f5a81991 --- /dev/null +++ b/GCC14-1006-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch @@ -0,0 +1,715 @@ +From 8eec2221c01ebc677a67f859db8efae5e167bf40 Mon Sep 17 00:00:00 2001 +From: huang-xiaoquan +Date: Tue, 15 Jul 2025 11:20:10 +0800 +Subject: [PATCH 1/2] Add multi-version lto symbol parse, cross lto units + ipa-inline extension, and lto compression algorithm specified. + +--- + gcc/common.opt | 20 ++++ + gcc/doc/tm.texi | 6 ++ + gcc/doc/tm.texi.in | 2 + + gcc/ipa-inline.cc | 107 ++++++++++++++++++- + gcc/lto-compress.cc | 6 +- + gcc/lto-section-in.cc | 5 + + gcc/lto-streamer-out.cc | 7 +- + gcc/lto-streamer.h | 7 +- + gcc/lto-wrapper.cc | 4 + + gcc/optc-save-gen.awk | 57 ++++++++++ + gcc/opth-gen.awk | 3 + + gcc/opts.cc | 46 ++++++++ + gcc/target.def | 10 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c | 15 +++ + gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c | 6 ++ + gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c | 15 +++ + gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c | 5 + + gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c | 15 +++ + gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c | 10 ++ + gcc/tree-streamer-in.cc | 44 ++++++++ + 20 files changed, 383 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c + create mode 100644 gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index ad3488447..4179302db 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1962,6 +1962,21 @@ finline-atomics + Common Var(flag_inline_atomics) Init(1) Optimization + Inline __atomic operations when a lock free instruction sequence is available. + ++fmulti-version-lib= ++Common Joined Var(multi_version_lib_string) ++Use specify LTO stream in mode for specified target (object or lib). If there ++are multiple target files, use commas (,) to separate them and without spaces. ++ ++finline-force ++Common Var(flag_inline_force) Init(0) Optimization ++Force perform ipa inline when march options are incompatible between functions. ++ ++finline-force= ++Common Joined Var(force_inline_targets_string) ++Force perform ipa inline specified target(object or lib) when march options are ++incompatible between functions. If there are multiple target files, use commas ++(,) to separate them and without spaces. ++ + finline-stringops + Common RejectNegative Enum(ilsop_fn) Var(flag_inline_stringops, ILSOP_ALL) Enum(ilsop_fn) Init(ILSOP_NONE) Optimization Undocumented + +@@ -2227,6 +2242,11 @@ flto-partition= + Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED) + Specify the algorithm to partition symbols and vars at linktime. + ++flto-compression-algorithm= ++Common Joined Var(lto_compression_algorithm) ++-flto-compression-algorithm= Generate lto compression in zlib/zstd ++format . ++ + ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. + flto-compression-level= + Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 19) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index cd5007822..6cd41bc71 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -10807,6 +10807,12 @@ information in the @code{struct cl_target_option} structure for + function-specific options. + @end deftypefn + ++@deftypefn {Target Hook} void TARGET_OPTION_PRINT_DIFF (FILE *@var{file}, int @var{indent}, struct cl_target_option *@var{ptr1}, struct cl_target_option *@var{ptr2}) ++This hook is called to print diff additional target-specific ++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for ++function-specific options. ++@end deftypefn ++ + @deftypefn {Target Hook} bool TARGET_OPTION_PRAGMA_PARSE (tree @var{args}, tree @var{pop_target}) + This target hook parses the options for @code{#pragma GCC target}, which + sets the target-specific options for functions that occur later in the +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 058bd5648..da2efddfa 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7105,6 +7105,8 @@ on this implementation detail. + + @hook TARGET_OPTION_PRINT + ++@hook TARGET_OPTION_PRINT_DIFF ++ + @hook TARGET_OPTION_PRAGMA_PARSE + + @hook TARGET_OPTION_OVERRIDE +diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc +index 9fc41b769..b0218bceb 100644 +--- a/gcc/ipa-inline.cc ++++ b/gcc/ipa-inline.cc +@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see + the need for offline copy of the function. */ + + #include "config.h" ++#define INCLUDE_SET ++#define INCLUDE_STRING + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -175,6 +177,7 @@ typedef fibonacci_node edge_heap_node_t; + static int overall_size; + static profile_count max_count; + static profile_count spec_rem; ++static std::set force_inline_targets; + + /* Return false when inlining edge E would lead to violating + limits on function unit growth or stack usage growth. +@@ -358,6 +361,77 @@ sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee) + (opts_for_fn (caller->decl)->x_##flag \ + != opts_for_fn (callee->decl)->x_##flag) + ++/* find related node that has lto_file_data. */ ++ ++static cgraph_node * ++find_related_node_lto_file_data (cgraph_node *node) ++{ ++ cgraph_node *cur = node; ++ ++ while (cur->clone_of) ++ { ++ /* Switch to original node, for example xxx.constprop.x function. */ ++ cur = cur->clone_of; ++ if (cur->lto_file_data) ++ return cur; ++ ++ /* Find the lto_file_data information of referring. */ ++ struct ipa_ref *ref = NULL; ++ for (int i = 0; cur->iterate_referring (i, ref); i++) ++ { ++ struct cgraph_node *cnode = dyn_cast (ref->referring); ++ if (cnode && cnode->lto_file_data) ++ return cnode; ++ } ++ } ++ ++ return NULL; ++} ++ ++/* Determines whether to force inline or force inline only the specified ++ object. Use for 3 inline extensions: ++ 1) CIF_TARGET_OPTION_MISMATCH: cancel the restriction that the target options ++ of different compilation units are different. ++ 2) CIF_OVERWRITABLE: indicates that the function is available, which is ++ similar to the "inline" keyword indication. ++ 3) CIF_OPTIMIZATION_MISMATCH: cancel the check in the case of fp_expressions, ++ which is similar to the "always_inline" attribute. ++ */ ++ ++static bool ++can_force_inline_p (cgraph_node *callee) ++{ ++ if (!in_lto_p) ++ return false; ++ if (flag_inline_force) ++ return true; ++ if (force_inline_targets_string) ++ { ++ cgraph_node * node = callee; ++ std::string name = ""; ++ if (callee->ultimate_alias_target () == NULL ++ || callee->ultimate_alias_target ()->lto_file_data == NULL) ++ { ++ node = find_related_node_lto_file_data (callee); ++ if (node && node->lto_file_data) ++ name = node->lto_file_data->file_name; ++ } ++ else ++ name = node->ultimate_alias_target ()->lto_file_data->file_name; ++ while (!name.empty () && name.back () == '/') ++ name.erase (name.length () - 1); ++ if (name.empty ()) ++ return false; ++ size_t last_slash_pos = name.find_last_of ('/'); ++ if (last_slash_pos != std::string::npos ++ && last_slash_pos != name.length () - 1) ++ name = name.substr (last_slash_pos + 1); ++ if (force_inline_targets.find (name) != force_inline_targets.end ()) ++ return true; ++ } ++ return false; ++} ++ + /* Decide if we can inline the edge and possibly update + inline_failed reason. + We check whether inlining is possible at all and whether +@@ -400,7 +474,7 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, + e->inline_failed = CIF_USES_COMDAT_LOCAL; + inlinable = false; + } +- else if (avail <= AVAIL_INTERPOSABLE) ++ else if (avail <= AVAIL_INTERPOSABLE && !can_force_inline_p (callee)) + { + e->inline_failed = CIF_OVERWRITABLE; + inlinable = false; +@@ -426,8 +500,8 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, + inlinable = false; + } + /* Check compatibility of target optimization options. */ +- else if (!targetm.target_option.can_inline_p (caller->decl, +- callee->decl)) ++ else if (!can_force_inline_p (callee) ++ && !targetm.target_option.can_inline_p (caller->decl, callee->decl)) + { + e->inline_failed = CIF_TARGET_OPTION_MISMATCH; + inlinable = false; +@@ -558,7 +632,8 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, int flags) + bool always_inline = + (DECL_DISREGARD_INLINE_LIMITS (callee->decl) + && lookup_attribute ("always_inline", +- DECL_ATTRIBUTES (callee->decl))); ++ DECL_ATTRIBUTES (callee->decl))) ++ || can_force_inline_p (callee); + ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller); + ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee); + +@@ -2775,6 +2850,27 @@ flatten_remove_node_hook (struct cgraph_node *node, void *data) + removed->add (node); + } + ++/* Parse string that specify forced inlining, separated by commas. */ ++ ++static void ++parse_force_inline_targets_string (const char* s) ++{ ++ std::string target_string (s); ++ std::string delim = ","; ++ size_t start = 0; ++ size_t end = target_string.find (delim); ++ if (target_string.substr (start, end - start) == "") ++ return; ++ ++ while (end != std::string::npos) ++ { ++ force_inline_targets.insert (target_string.substr (start, end - start)); ++ start = end + delim.size (); ++ end = target_string.find (delim, start); ++ } ++ force_inline_targets.insert (target_string.substr (start, end - start)); ++} ++ + /* Decide on the inlining. We do so in the topological order to avoid + expenses on updating data structures. */ + +@@ -2788,6 +2884,9 @@ ipa_inline (void) + int cold; + bool remove_functions = false; + ++ if (force_inline_targets_string) ++ parse_force_inline_targets_string (force_inline_targets_string); ++ + order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); + + if (dump_file) +diff --git a/gcc/lto-compress.cc b/gcc/lto-compress.cc +index bebf0277e..29d652bf4 100644 +--- a/gcc/lto-compress.cc ++++ b/gcc/lto-compress.cc +@@ -305,7 +305,11 @@ void + lto_end_compression (struct lto_compression_stream *stream) + { + #ifdef HAVE_ZSTD_H +- lto_compression_zstd (stream); ++ if (lto_compression_algorithm ++ && strcmp (lto_compression_algorithm, "zstd") == 0) ++ lto_compression_zstd (stream); ++ else ++ lto_compression_zlib (stream); + #else + lto_compression_zlib (stream); + #endif +diff --git a/gcc/lto-section-in.cc b/gcc/lto-section-in.cc +index fc2ba1906..86002d530 100644 +--- a/gcc/lto-section-in.cc ++++ b/gcc/lto-section-in.cc +@@ -448,6 +448,11 @@ lto_free_function_in_decl_state_for_node (symtab_node *node) + lto_free_function_in_decl_state (*slot); + node->lto_file_data->function_decl_states->clear_slot (slot); + } ++ ++ /* In force inline case, keep lto file path information. */ ++ if (in_lto_p && (flag_inline_force || force_inline_targets_string)) ++ return; ++ + node->lto_file_data = NULL; + } + +diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc +index d4f728094..808d821d2 100644 +--- a/gcc/lto-streamer-out.cc ++++ b/gcc/lto-streamer-out.cc +@@ -2689,7 +2689,12 @@ produce_lto_section () + free (section_name); + + #ifdef HAVE_ZSTD_H +- lto_compression compression = ZSTD; ++ lto_compression compression = ZLIB; ++ if (lto_compression_algorithm ++ && strcmp (lto_compression_algorithm, "zstd") == 0) ++ compression = ZSTD; ++ else ++ compression = ZLIB; + #else + lto_compression compression = ZLIB; + #endif +diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h +index e8dbba471..48fa400eb 100644 +--- a/gcc/lto-streamer.h ++++ b/gcc/lto-streamer.h +@@ -948,13 +948,18 @@ void cl_target_option_stream_in (class data_in *, + struct bitpack_d *, + struct cl_target_option *); + ++void cl_target_option_stream_in_prev (class data_in *, ++ struct bitpack_d *, ++ struct cl_target_option *); ++ + void cl_optimization_stream_out (struct output_block *, + struct bitpack_d *, struct cl_optimization *); + + void cl_optimization_stream_in (class data_in *, + struct bitpack_d *, struct cl_optimization *); + +- ++void cl_optimization_stream_in_prev (class data_in *, ++ struct bitpack_d *, struct cl_optimization *); + + /* In lto-opts.cc. */ + extern void lto_write_options (void); +diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc +index c7bad4b61..cfdec660a 100644 +--- a/gcc/lto-wrapper.cc ++++ b/gcc/lto-wrapper.cc +@@ -521,6 +521,8 @@ merge_and_complain (vec &decoded_options, + || decoded_options[j].opt_index == OPT_fpic) + { + /* -fno-pic in one unit implies -fno-pic everywhere. */ ++ /* The -fno-pic adjustment here should provide some information hints, ++ but may affect the use case test of deja. */ + if (decoded_options[j].value == 0) + j++; + /* If we have no pic option or merge in -fno-pic, we still may turn +@@ -564,6 +566,8 @@ merge_and_complain (vec &decoded_options, + || decoded_options[j].opt_index == OPT_fpie) + { + /* -fno-pie in one unit implies -fno-pie everywhere. */ ++ /* The -fno-pie adjustment here should provide some information hints, ++ but may affect the use case test of deja. */ + if (decoded_options[j].value == 0) + j++; + /* If we have no pie option or merge in -fno-pie, we still preserve +diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk +index 2d36e5867..3c9242c9b 100644 +--- a/gcc/optc-save-gen.awk ++++ b/gcc/optc-save-gen.awk +@@ -1017,6 +1017,10 @@ for (i = 0; i < n_target_string; i++) { + print ""; + } + ++print ""; ++print " if (targetm.target_option.print_diff)"; ++print " targetm.target_option.print_diff (file, indent, ptr1, ptr2);"; ++ + print "}"; + + print ""; +@@ -1126,6 +1130,59 @@ print " return true;"; + + print "}"; + ++print ""; ++print "/* Compare two target major options. */"; ++print "bool"; ++print "cl_target_option_eq_major (struct cl_target_option const *ptr1 ATTRIBUTE_UNUSED,"; ++print " struct cl_target_option const *ptr2 ATTRIBUTE_UNUSED)"; ++print "{"; ++n_target_val_major = 0; ++ ++for (i = 0; i < n_target_save; i++) { ++ var = target_save_decl[i]; ++ sub (" *=.*", "", var);TARGET_OPTION_PRINT_DIFF ++ name = var; ++ type = var; ++ sub("^.*[ *]", "", name) ++ sub(" *" name "$", "", type) ++ if (target_save_decl[i] ~ "^const char \\*+[_" alnum "]+$") ++ continue; ++ if (target_save_decl[i] ~ " .*\\[.+\\]+$") ++ continue; ++ ++ var_target_val_major[n_target_val_major++] = name; ++} ++if (have_save) { ++ for (i = 0; i < n_opts; i++) { ++ if (flag_set_p("Save", flags[i])) { ++ name = var_name(flags[i]) ++ if(name == "") ++ name = "target_flags"; ++ ++ if(name in var_list_seen) ++ continue; ++ ++ var_list_seen[name]++; ++ otype = var_type_struct(flags[i]) ++ if (otype ~ "^const char \\**$") ++ continue; ++ var_target_val_major[n_target_val_major++] = "x_" name; ++ } ++ } ++} else { ++ var_target_val_major[n_target_val_major++] = "x_target_flags"; ++} ++ ++for (i = 0; i < n_target_val_major; i++) { ++ name = var_target_val_major[i] ++ print " if (ptr1->" name" != ptr2->" name ")"; ++ print " return false;"; ++} ++ ++print " return true;"; ++ ++print "}"; ++ + print ""; + print "/* Hash target options */"; + print "hashval_t"; +diff --git a/gcc/opth-gen.awk b/gcc/opth-gen.awk +index 6a6895806..4466d740e 100644 +--- a/gcc/opth-gen.awk ++++ b/gcc/opth-gen.awk +@@ -330,6 +330,9 @@ print ""; + print "/* Compare two target option variables from a structure. */"; + print "extern bool cl_target_option_eq (const struct cl_target_option *, const struct cl_target_option *);"; + print ""; ++print "/* Compare two target major option variables from a structure. */"; ++print "extern bool cl_target_option_eq_major (const struct cl_target_option *, const struct cl_target_option *);"; ++print ""; + print "/* Free heap memory used by target option variables. */"; + print "extern void cl_target_option_free (struct cl_target_option *);"; + print ""; +diff --git a/gcc/opts.cc b/gcc/opts.cc +index 3333600e0..2838aaf6d 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -2659,6 +2659,32 @@ print_help (struct gcc_options *opts, unsigned int lang_mask, + lang_mask); + } + ++/* Checks whether the input forced inline string complies with the ++ restriction. */ ++ ++void ++check_force_inline_targets_string (const char *arg, location_t loc) ++{ ++ const int MAX_FORCE_INLINE_TARGET_LEN = 10000; ++ const int MAX_NUM_TARGET = 100; ++ __SIZE_TYPE__ length = strlen (arg); ++ int target_num = 1; ++ if (length > MAX_FORCE_INLINE_TARGET_LEN) ++ error_at (loc, ++ "input string exceeds %d characters to %<-finline_force=%> " ++ "option: %qs", MAX_FORCE_INLINE_TARGET_LEN, arg); ++ for (__SIZE_TYPE__ i = 0; i < length; i++) ++ { ++ if (arg[i] == ',') ++ { ++ target_num++; ++ if (target_num > MAX_NUM_TARGET) ++ error_at (loc, "input target exceeds %d to %<-finline_force=%> " ++ "option: %qs", MAX_NUM_TARGET, arg); ++ } ++ } ++} ++ + /* Handle target- and language-independent options. Return zero to + generate an "unknown option" message. Only options that need + extra handling need to be listed here; if you simply want +@@ -3023,6 +3049,14 @@ common_handle_option (struct gcc_options *opts, + value / 2); + break; + ++ case OPT_finline_force: ++ opts->x_force_inline_targets_string = value ? "" : NULL; ++ break; ++ ++ case OPT_finline_force_: ++ check_force_inline_targets_string (arg, loc); ++ break; ++ + case OPT_finstrument_functions_exclude_function_list_: + add_comma_separated_to_vector + (&opts->x_flag_instrument_functions_exclude_functions, arg); +@@ -3280,6 +3314,18 @@ common_handle_option (struct gcc_options *opts, + "unrecognized argument to %<-flto=%> option: %qs", arg); + break; + ++ case OPT_flto_compression_algorithm_: ++ if (atoi (arg) == 0 ++ && strcmp (arg, "zlib") != 0 ++#ifdef HAVE_ZSTD_H ++ && strcmp (arg, "zstd") != 0 ++#endif ++ ) ++ error_at (loc, ++ "unrecognized argument to %<-flto-compression-algorithm=%> " ++ "option: %qs", arg); ++ break; ++ + case OPT_w: + dc->m_inhibit_warnings = true; + break; +diff --git a/gcc/target.def b/gcc/target.def +index c27df8095..cb0594e38 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -6701,6 +6701,16 @@ information in the @code{struct cl_target_option} structure for\n\ + function-specific options.", + void, (FILE *file, int indent, struct cl_target_option *ptr), NULL) + ++/* Function to print any extra target state from the target options ++ structure. */ ++DEFHOOK ++(print_diff, ++ "This hook is called to print diff additional target-specific\n\ ++information in the ptr1 and ptr2 @code{struct cl_target_option} structure for\n\ ++function-specific options.", ++ void, (FILE *file, int indent, struct cl_target_option *ptr1, ++ struct cl_target_option *ptr2), NULL) ++ + /* Function to parse arguments to be validated for #pragma target, and to + change the state if the options are valid. If the first argument is + NULL, the second argument specifies the default options to use. Return +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c +new file mode 100644 +index 000000000..0b5cd5953 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-1_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-1_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -march=armv8.2-a -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(float x); ++ ++double func_a (float x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c +new file mode 100644 +index 000000000..8181384b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-1_1.c +@@ -0,0 +1,6 @@ ++/* { dg-options "-march=armv8.3-a+sve+f64mm+crc+crypto+fp16+i8mm+simd" } */ ++ ++double multi_op (float x) ++{ ++ return x * 2 + 10; ++} +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c +new file mode 100644 +index 000000000..e873937d3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-2_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-2_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -fPIC -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(float x); ++ ++double func_a (float x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c +new file mode 100644 +index 000000000..dc7c4fd9f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-2_1.c +@@ -0,0 +1,5 @@ ++ ++double multi_op (float x) ++{ ++ return x * 2 + 10; ++} +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c b/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c +new file mode 100644 +index 000000000..c78ba066d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-3_0.c +@@ -0,0 +1,15 @@ ++/* { dg-lto-do link } */ ++/* { dg-require-effective-target shared } */ ++/* { dg-extra-ld-options {-shared -finline-force=c_lto_binary-inline-3_1.o} } */ ++/* { dg-lto-options {{-O3 -flto -fdump-ipa-inline-details}} } */ ++ ++extern double multi_op(double x); ++ ++double func_a (double x) ++{ ++ double res = 0; ++ res = multi_op (x); ++ return res; ++} ++ ++/* { dg-final { scan-wpa-ipa-dump "Inlined 1 calls" "inline" } } */ +diff --git a/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c b/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c +new file mode 100644 +index 000000000..8b505fa0c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/binary-inline-3_1.c +@@ -0,0 +1,10 @@ ++/* { dg-options "-O2 -fno-math-errno" } */ ++ ++#include ++ ++double multi_op (double x) ++{ ++ double a = 0; ++ a = sqrt (x); ++ return a * 2 + 10; ++} +diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc +index 35341a2b2..3c8cf384f 100644 +--- a/gcc/tree-streamer-in.cc ++++ b/gcc/tree-streamer-in.cc +@@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see + . */ + + #include "config.h" ++#include ++#define INCLUDE_SET ++#define INCLUDE_STRING + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -36,6 +39,47 @@ along with GCC; see the file COPYING3. If not see + #include "asan.h" + #include "opts.h" + ++/* Parse string that specify forced inlining, separated by commas. */ ++static std::set multi_version_libs; ++static void ++parse_multi_version_lib_string (const char* s) ++{ ++ std::string target_string (s); ++ std::string delim = ","; ++ size_t start = 0; ++ size_t end = target_string.find (delim); ++ if (target_string.substr (start, end - start) == "") ++ return; ++ ++ while (end != std::string::npos) ++ { ++ multi_version_libs.insert (target_string.substr (start, end - start)); ++ start = end + delim.size (); ++ end = target_string.find (delim, start); ++ } ++ multi_version_libs.insert (target_string.substr (start, end - start)); ++} ++ ++static bool ++target_lib_p (std::string name) ++{ ++ if (multi_version_libs.empty () && multi_version_lib_string) ++ parse_multi_version_lib_string (multi_version_lib_string); ++ if (multi_version_lib_string) ++ { ++ while (!name.empty () && name.back () == '/') ++ name.erase (name.length () - 1); ++ if (name.empty ()) ++ return false; ++ size_t last_slash_pos = name.find_last_of ('/'); ++ if (last_slash_pos != std::string::npos ++ && last_slash_pos != name.length () - 1) ++ name = name.substr (last_slash_pos + 1); ++ if (multi_version_libs.find (name) != multi_version_libs.end ()) ++ return true; ++ } ++ return false; ++} + + /* Read a STRING_CST from the string table in DATA_IN using input + block IB. */ +-- +2.33.0 + diff --git a/GCC14-1007-Add-hip09-machine-discribtion.patch b/GCC14-1007-Add-hip09-machine-discribtion.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d68ed7f00e1d4bb64a16363709ee2bf6d8ba189 --- /dev/null +++ b/GCC14-1007-Add-hip09-machine-discribtion.patch @@ -0,0 +1,924 @@ +From 2d7ae4a5b13671b213c4ed3a662be521e9054f17 Mon Sep 17 00:00:00 2001 +From: huang-xiaoquan +Date: Tue, 15 Jul 2025 15:35:32 +0800 +Subject: [PATCH] Add hip09 machine discribtion + +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 108 ++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64-tuning-flags.def | 3 + + gcc/config/aarch64/aarch64.cc | 9 + + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip09.md | 558 ++++++++++++++++++++ + gcc/config/aarch64/tuning_models/hip09.h | 137 +++++ + 8 files changed, 818 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/aarch64/hip09.md + create mode 100644 gcc/config/aarch64/tuning_models/hip09.h + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index b1eaf5512..dd75748c4 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -136,6 +136,7 @@ AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, LS64, RNG + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES, RNG), hip09, 0x48, 0xd02, -1) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 7c7949161..3f1c9310e 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -668,6 +668,114 @@ const struct cpu_cost_table a64fx_extra_costs = + } + }; + ++const struct cpu_cost_table hip09_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1), /* alu. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (1), /* movi. */ ++ COSTS_N_INSNS (2), /* dup. */ ++ COSTS_N_INSNS (2) /* extract. */ ++ } ++}; ++ + const struct cpu_cost_table ampere1_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 35b27ddb8..67bbee9b9 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,generic,generic_armv8_a,generic_armv9_a" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,hip09,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,generic,generic_armv8_a,generic_armv9_a" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def +index d917da720..aca9bc391 100644 +--- a/gcc/config/aarch64/aarch64-tuning-flags.def ++++ b/gcc/config/aarch64/aarch64-tuning-flags.def +@@ -41,6 +41,9 @@ AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS) + + AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS) + ++/* Prefer Advanced SIMD over SVE for auto-vectorization. */ ++AARCH64_EXTRA_TUNING_OPTION ("prefer_advsimd_autovec", PREFER_ADVSIMD_AUTOVEC) ++ + AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS) + + AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index b28ebc96b..cfbcdba6d 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -395,6 +395,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] = + #include "tuning_models/thunderxt88.h" + #include "tuning_models/thunderx.h" + #include "tuning_models/tsv110.h" ++#include "tuning_models/hip09.h" + #include "tuning_models/xgene1.h" + #include "tuning_models/emag.h" + #include "tuning_models/qdf24xx.h" +@@ -18361,6 +18362,14 @@ aarch64_override_options_internal (struct gcc_options *opts) + SET_OPTION_IF_UNSET (opts, &global_options_set, + param_sched_autopref_queue_depth, queue_depth); + ++ /* If the core wants only AdvancedSIMD autovectorization, do this through ++ aarch64_autovec_preference. If the user set it explicitly, they should ++ know what they want. */ ++ if (aarch64_tune_params.extra_tuning_flags ++ & AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC) ++ SET_OPTION_IF_UNSET (opts, &global_options_set, ++ aarch64_autovec_preference, 1); ++ + /* Set up parameters to be used in prefetching algorithm. Do not + override the defaults unless we are tuning for a core we have + researched values for. */ +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 6a481059b..a16e35909 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -579,6 +579,7 @@ + (include "thunderx2t99.md") + (include "tsv110.md") + (include "thunderx3t110.md") ++(include "hip09.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md +new file mode 100644 +index 000000000..1c7c880e6 +--- /dev/null ++++ b/gcc/config/aarch64/hip09.md +@@ -0,0 +1,558 @@ ++;; hip09 pipeline description ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;;Contributed by Yushuai Xing ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_automaton "hip09") ++(define_automaton "hip09_ldst") ++(define_automaton "hip09_fsu") ++ ++(define_attr "hip09_type" ++ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla, ++ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt, ++ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes, ++ hip09_neon_load2and4, hip09_neon_load3_3reg, ++ hip09_neon_load4_4reg, hip09_neon_store1and2, ++ hip09_neon_store1_1reg, hip09_neon_store1_2reg, ++ hip09_neon_store1_3reg, hip09_neon_store1_4reg, ++ hip09_neon_store3and4_lane, hip09_neon_store3_3reg, ++ hip09_neon_store4_4reg, unknown" ++ (cond [ ++ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\ ++ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\ ++ neon_sub_widen,neon_qadd,neon_qadd_q,\ ++ neon_add_long,neon_sub_long,\ ++ neon_qabs,neon_qabs_q,neon_qneg,\ ++ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\ ++ neon_compare_q,neon_compare_zero,\ ++ neon_compare_zero_q,neon_logic,neon_logic_q,\ ++ neon_minmax,neon_minmax_q,neon_tst,\ ++ neon_tst_q,neon_bsl,neon_bsl_q,\ ++ neon_cls,neon_cls_q,neon_ext,\ ++ neon_ext_q,neon_rev,neon_rev_q,\ ++ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\ ++ neon_fp_abs_s_q,neon_fp_abs_d,\ ++ neon_fp_neg_s,neon_fp_neg_s_q,\ ++ neon_fp_neg_d,neon_fp_neg_d_q,\ ++ neon_shift_imm_narrow_q,neon_move,neon_move_q") ++ (const_string "hip09_neon_abs") ++ (eq_attr "type" "neon_abd,neon_abd_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_add_halve,neon_add_halve_q,\ ++ neon_sub_halve,neon_sub_halve_q,\ ++ neon_add_halve_narrow_q,\ ++ neon_sub_halve_narrow_q,neon_reduc_add,\ ++ neon_reduc_add_q,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,\ ++ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\ ++ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\ ++ neon_mla_b_long,neon_sat_mla_b_long,\ ++ neon_sat_shift_imm,\ ++ neon_sat_shift_imm_q,neon_shift_imm_long,\ ++ neon_shift_imm,neon_shift_imm_q,neon_cnt,\ ++ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\ ++ neon_fp_recpe_d,neon_fp_recpe_d_q,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\ ++ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\ ++ neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q,\ ++ neon_tbl2,neon_tbl2_q,neon_to_gp,\ ++ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q,\ ++ neon_fp_cvt_widen_s,neon_fp_to_int_s,\ ++ neon_fp_to_int_s_q,neon_fp_to_int_d,\ ++ neon_fp_to_int_d_q,neon_fp_minmax_s,\ ++ neon_fp_minmax_s_q,neon_fp_minmax_d,\ ++ neon_fp_minmax_d_q,neon_fp_round_s,\ ++ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\ ++ neon_fp_round_d,neon_fp_round_d_q,\ ++ neon_fp_cvt_narrow_s_q") ++ (const_string "hip09_neon_fp_arith") ++ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\ ++ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\ ++ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\ ++ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_h_scalar_long,\ ++ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\ ++ neon_mla_s,neon_mla_h_scalar,\ ++ neon_mla_h_scalar_q,neon_mla_s_scalar,\ ++ neon_mla_h_long,\ ++ neon_mla_s_long,neon_sat_mla_h_long,\ ++ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\ ++ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\ ++ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\ ++ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\ ++ neon_shift_reg_q,neon_sat_shift_reg,\ ++ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\ ++ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\ ++ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\ ++ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d_q,\ ++ neon_fp_mul_s_q,\ ++ neon_fp_mul_d,neon_fp_mul_d_q,\ ++ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\ ++ neon_fp_mul_s_scalar_q") ++ (const_string "hip09_neon_mul") ++ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\ ++ neon_reduc_minmax_q,neon_fp_recps_s,\ ++ neon_fp_recps_s_q,neon_fp_recps_d,\ ++ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\ ++ neon_fp_mla_s,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mla_d_scalar_q") ++ (const_string "hip09_neon_mla") ++ (eq_attr "type" "neon_dot,neon_dot_q") ++ (const_string "hip09_neon_dot") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\ ++ neon_fp_div_d,neon_fp_div_d_q") ++ (const_string "hip09_neon_fp_div") ++ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ ++ neon_fp_sqrt_d,neon_fp_sqrt_d_q") ++ (const_string "hip09_neon_fp_sqrt") ++ (eq_attr "type" "neon_dup,neon_dup_q,\ ++ neon_ins,neon_ins_q") ++ (const_string "hip09_neon_ins") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ ++ neon_load1_2reg,neon_load1_2reg_q,\ ++ neon_load1_3reg,neon_load1_3reg_q,\ ++ neon_load1_4reg,neon_load1_4reg_q") ++ (const_string "hip09_neon_load1") ++ (eq_attr "type" "neon_load1_one_lane,\ ++ neon_load1_one_lane_q,\ ++ neon_load1_all_lanes,neon_load1_all_lanes_q") ++ (const_string "hip09_neon_load1_lanes") ++ (eq_attr "type" "neon_load2_all_lanes,\ ++ neon_load2_all_lanes_q,\ ++ neon_load2_one_lane,neon_load2_2reg,\ ++ neon_load2_2reg_q,neon_load3_one_lane,\ ++ neon_load3_all_lanes,neon_load3_all_lanes_q,\ ++ neon_load4_one_lane,neon_load4_all_lanes,\ ++ neon_load4_all_lanes_q") ++ (const_string "hip09_neon_load2and4") ++ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q") ++ (const_string "hip09_neon_load3_3reg") ++ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q") ++ (const_string "hip09_neon_load4_4reg") ++ (eq_attr "type" "neon_store1_one_lane,\ ++ neon_store1_one_lane_q,neon_store2_one_lane,\ ++ neon_store2_one_lane_q,neon_store2_2reg,\ ++ neon_store2_2reg_q") ++ (const_string "hip09_neon_store1and2") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q") ++ (const_string "hip09_neon_store1_1reg") ++ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q") ++ (const_string "hip09_neon_store1_2reg") ++ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q") ++ (const_string "hip09_neon_store1_3reg") ++ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q") ++ (const_string "hip09_neon_store1_4reg") ++ (eq_attr "type" "neon_store3_one_lane,\ ++ neon_store3_one_lane_q,neon_store4_one_lane,\ ++ neon_store4_one_lane_q") ++ (const_string "hip09_neon_store3and4_lane") ++ (eq_attr "type" "neon_store3_3reg,\ ++ neon_store3_3reg_q") ++ (const_string "hip09_neon_store3_3reg") ++ (eq_attr "type" "neon_store4_4reg,\ ++ neon_store4_4reg_q") ++ (const_string "hip09_neon_store4_4reg")] ++ (const_string "unknown"))) ++ ++; The hip09 core is modelled as issues pipeline that has ++; the following functional units. ++; 1. Two pipelines for branch micro operations: BRU1, BRU2 ++ ++(define_cpu_unit "hip09_bru0" "hip09") ++(define_cpu_unit "hip09_bru1" "hip09") ++ ++(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1") ++ ++; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4 ++ ++(define_cpu_unit "hip09_alus0" "hip09") ++(define_cpu_unit "hip09_alus1" "hip09") ++(define_cpu_unit "hip09_alus2" "hip09") ++(define_cpu_unit "hip09_alus3" "hip09") ++ ++(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3") ++(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1") ++(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3") ++ ++; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2 ++ ++(define_cpu_unit "hip09_alum0" "hip09") ++(define_cpu_unit "hip09_alum1" "hip09") ++ ++(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1") ++ ++; 4. Two pipelines for load micro opetations: Load1, Load2 ++ ++(define_cpu_unit "hip09_load0" "hip09_ldst") ++(define_cpu_unit "hip09_load1" "hip09_ldst") ++ ++(define_reservation "hip09_ld01" "hip09_load0|hip09_load1") ++ ++; 5. Two pipelines for store micro operations: Store1, Store2 ++ ++(define_cpu_unit "hip09_store0" "hip09_ldst") ++(define_cpu_unit "hip09_store1" "hip09_ldst") ++ ++(define_reservation "hip09_st01" "hip09_store0|hip09_store1") ++ ++; 6. Two pipelines for store data micro operations: STD0,STD1 ++ ++(define_cpu_unit "hip09_store_data0" "hip09_ldst") ++(define_cpu_unit "hip09_store_data1" "hip09_ldst") ++ ++(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1") ++ ++; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4 ++ ++(define_cpu_unit "hip09_fsu0" "hip09_fsu") ++(define_cpu_unit "hip09_fsu1" "hip09_fsu") ++(define_cpu_unit "hip09_fsu2" "hip09_fsu") ++(define_cpu_unit "hip09_fsu3" "hip09_fsu") ++ ++(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3") ++(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2") ++ ++ ++; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2 ++ ++;; Simple Execution Unit: ++; ++;; Simple ALU without shift ++(define_insn_reservation "hip09_alu" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_imm,logic_imm,\ ++ adc_imm,adc_reg,\ ++ alu_sreg,logic_reg,\ ++ mov_imm,mov_reg,\ ++ csel,rotate_imm,bfm,mov_imm,\ ++ clz,rbit,rev")) ++ "hip09_alus0123") ++ ++(define_insn_reservation "hip09_alus" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alus_sreg,alus_imm,\ ++ adcs_reg,adcs_imm,\ ++ logics_imm,logics_reg,adr")) ++ "hip09_alus23") ++ ++;; ALU ops with shift and extend ++(define_insn_reservation "hip09_alu_ext_shift" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_ext,alus_ext,\ ++ logics_shift_imm,logics_shift_reg,\ ++ logic_shift_reg,logic_shift_imm,\ ++ ")) ++ "hip09_alum01") ++ ++;; Multiplies instructions ++(define_insn_reservation "hip09_mult" 3 ++ (and (eq_attr "tune" "hip09") ++ (ior (eq_attr "mul32" "yes") ++ (eq_attr "widen_mul64" "yes"))) ++ "hip09_alum01") ++ ++;; Integer divide ++(define_insn_reservation "hip09_div" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "udiv,sdiv")) ++ "hip09_alum0") ++ ++;; Branch execution Unit ++; ++; Branches take two issue slot. ++; No latency as there is no result ++(define_insn_reservation "hip09_branch" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "branch,call")) ++ "hip09_bru01 + hip09_alus23") ++ ++;; Load execution Unit ++; ++; Loads of up to two words. ++(define_insn_reservation "hip09_load1" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "load_4,load_8")) ++ "hip09_ld01") ++ ++; Stores of up to two words. ++(define_insn_reservation "hip09_store1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "store_4,store_8")) ++ "hip09_st01") ++ ++;; FP data processing instructions. ++ ++(define_insn_reservation "hip09_fp_arith" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\ ++ f_mrc")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_cmp" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcmps,fcmpd")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_ccmp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fccmps,fccmpd")) ++ "hip09_alus01+hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_csel" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcsel,f_mcr")) ++ "hip09_alus01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divs" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divd" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrts" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrts")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrtd" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrtd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mul" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmuls,fmuld")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_add" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\ ++ f_rints,f_rintd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mac" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmacs,fmacd")) ++ "hip09_fsu0123") ++ ++;; FP miscellaneous instructions. ++ ++(define_insn_reservation "hip09_fp_cvt" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvtf2i")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_cvt2" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvti2f")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; FP Load Instructions ++ ++(define_insn_reservation "hip09_fp_load" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_fp_load2" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_ldp_q,neon_ldp")) ++ "hip09_ld01+hip09_alus01") ++ ++;; FP store instructions ++ ++(define_insn_reservation "hip09_fp_store" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_stores,f_stored")) ++ "hip09_st01+hip09_std01") ++ ++;; ASIMD integer instructions ++ ++(define_insn_reservation "hip09_asimd_base1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_abs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base2" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_arith")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base3" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mul")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base4" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mla")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base5" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mul_s")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_dot" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_dot")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_bfmmla" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mla_s_q")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fdiv" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_div")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fsqrt" 25 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_sqrt")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_pmull" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_pmull")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_dup" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_ins")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; ASIMD load instructions ++ ++(define_insn_reservation "hip09_asimd_ld1_reg" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_asimd_ld1_lane" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1_lanes")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld23" 8 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load2and4")) ++"hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld3_mtp" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load3_3reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld4_mtp" 13 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load4_4reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++;; ASIMD store instructions ++ ++(define_insn_reservation "hip09_asimd_st12" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1and2")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_1reg" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_1reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_2reg" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_2reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_3reg" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_3reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_4reg" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_4reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st34_lane" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3and4_lane")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st3_mtp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3_3reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st4_mtp" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store4_4reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++;; Cryptography extensions ++ ++(define_insn_reservation "hip09_asimd_aes" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_aese,crypto_aesmc")) ++ "hip09_fsu02") ++ ++(define_insn_reservation "hip09_asimd_sha3" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\ ++ crypto_sha256_fast,crypto_sha512,\ ++ crypto_sm3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1_and256" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\ ++ crypto_sm4")) ++ "hip09_fsu2") ++ ++;; CRC extension. ++ ++(define_insn_reservation "hip09_crc" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crc")) ++ "hip09_alum01") +\ No newline at end of file +diff --git a/gcc/config/aarch64/tuning_models/hip09.h b/gcc/config/aarch64/tuning_models/hip09.h +new file mode 100644 +index 000000000..d000f5bba +--- /dev/null ++++ b/gcc/config/aarch64/tuning_models/hip09.h +@@ -0,0 +1,137 @@ ++/* Tuning model description for AArch64 architecture. ++ Copyright (C) 2009-2024 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_H_HIP09 ++#define GCC_AARCH64_H_HIP09 ++ ++#include "generic.h" ++ ++static const struct cpu_addrcost_table hip09_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* post_modify_ld3_st3 */ ++ 0, /* post_modify_ld4_st4 */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ ++static const struct cpu_regmove_cost hip09_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 5, /* GP2FP */ ++ 5, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ ++static const advsimd_vec_cost hip09_advsimd_vector_cost = ++{ ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 0, /* ld2_st2_permute_cost */ ++ 0, /* ld3_st3_permute_cost */ ++ 0, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ 3, /* reduc_i8_cost */ ++ 3, /* reduc_i16_cost */ ++ 3, /* reduc_i32_cost */ ++ 3, /* reduc_i64_cost */ ++ 3, /* reduc_f16_cost */ ++ 3, /* reduc_f32_cost */ ++ 3, /* reduc_f64_cost */ ++ 3, /* store_elt_extra_cost */ ++ 3, /* vec_to_scalar_cost */ ++ 2, /* scalar_to_vec_cost */ ++ 5, /* align_load_cost */ ++ 5, /* unalign_load_cost */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++}; ++ ++static const struct cpu_vector_cost hip09_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1, /* cond_not_taken_branch_cost */ ++ &hip09_advsimd_vector_cost, /* advsimd */ ++ nullptr, /* sve */ ++ nullptr /* issue_info */ ++}; ++ ++static const cpu_prefetch_tune hip09_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ ++static const struct tune_params hip09_tunings = ++{ ++ &hip09_extra_costs, ++ &hip09_addrcost_table, ++ &hip09_regmove_cost, ++ &generic_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_256, /* sve_width */ ++ { 4, /* load_int. */ ++ 4, /* store_int. */ ++ 4, /* load_fp. */ ++ 4, /* store_fp. */ ++ 4, /* load_pred. */ ++ 4 /* store_pred. */ ++ }, /* memmov_cost. */ ++ 2, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16:12", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* fma_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ ++ &hip09_prefetch_tune, ++ AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ ++ AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ ++}; ++ ++#endif /* GCC_AARCH64_H_HIP09. */ +-- +2.33.0 + diff --git a/gcc-14.spec b/gcc-14.spec index bb3e99ec708ed67d392196436d353dd08b972ce6..9f213c9e4f760f00f1bda0e11a7ea76edcf3e1ef 100644 --- a/gcc-14.spec +++ b/gcc-14.spec @@ -90,7 +90,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?_scl_prefix}gcc%{gcc_ver} Version: 14.3.1 -Release: 1 +Release: 3 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -173,6 +173,8 @@ Patch1001: GCC14-1001-libstdc++-compat.patch Patch1002: GCC14-1002-change-gcc-version.patch Patch1004: GCC14-1004-riscv-lib64.patch Patch1005: GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch +Patch1006: GCC14-1006-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch +Patch1007: GCC14-1007-Add-hip09-machine-discribtion.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -2203,6 +2205,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Jul 22 2025 huang-xiaoquan - 14.3.1-3 +- [Sync] Sync hip09 + +* Mon Jul 21 2025 huang-xiaoquan - 14.3.1-2 +- [Sync] Sync inline-force + * Tue Jun 17 2025 jchzhou - 14.3.1-1 - Update to 14.3.1