diff --git a/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch b/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d5f6c47c8e55137ad1e14a58e4d649ddbf87a0b --- /dev/null +++ b/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch @@ -0,0 +1,444 @@ +From 4e7735a8d87559bbddfe3a985786996e22241f8d Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Mon, 12 Aug 2024 14:35:31 +0800 +Subject: [PATCH] Move ix86_align_loops into a separate pass and insert the + pass after pass_endbr_and_patchable_area. + +gcc/ChangeLog: + + PR target/116174 + * config/i386/i386.cc (ix86_align_loops): Move this to .. + * config/i386/i386-features.cc (ix86_align_loops): .. here. + (class pass_align_tight_loops): New class. + (make_pass_align_tight_loops): New function. + * config/i386/i386-passes.def: Insert pass_align_tight_loops + after pass_insert_endbr_and_patchable_area. + * config/i386/i386-protos.h (make_pass_align_tight_loops): New + declare. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr116174.c: New test. + +(cherry picked from commit c3c83d22d212a35cb1bfb8727477819463f0dcd8) +--- + gcc/config/i386/i386-features.cc | 191 +++++++++++++++++++++++ + gcc/config/i386/i386-passes.def | 3 + + gcc/config/i386/i386-protos.h | 1 + + gcc/config/i386/i386.cc | 146 ----------------- + gcc/testsuite/gcc.target/i386/pr116174.c | 12 ++ + 5 files changed, 207 insertions(+), 146 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c + +diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc +index e3e004d5526..7de19d42363 100644 +--- a/gcc/config/i386/i386-features.cc ++++ b/gcc/config/i386/i386-features.cc +@@ -3253,6 +3253,197 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) + return new pass_remove_partial_avx_dependency (ctxt); + } + ++/* When a hot loop can be fit into one cacheline, ++ force align the loop without considering the max skip. */ ++static void ++ix86_align_loops () ++{ ++ basic_block bb; ++ ++ /* Don't do this when we don't know cache line size. */ ++ if (ix86_cost->prefetch_block == 0) ++ return; ++ ++ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); ++ profile_count count_threshold = cfun->cfg->count_max / param_align_threshold; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *label = BB_HEAD (bb); ++ bool has_fallthru = 0; ++ edge e; ++ edge_iterator ei; ++ ++ if (!LABEL_P (label)) ++ continue; ++ ++ profile_count fallthru_count = profile_count::zero (); ++ profile_count branch_count = profile_count::zero (); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (e->flags & EDGE_FALLTHRU) ++ has_fallthru = 1, fallthru_count += e->count (); ++ else ++ branch_count += e->count (); ++ } ++ ++ if (!fallthru_count.initialized_p () || !branch_count.initialized_p ()) ++ continue; ++ ++ if (bb->loop_father ++ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) ++ && (has_fallthru ++ ? (!(single_succ_p (bb) ++ && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ && optimize_bb_for_speed_p (bb) ++ && branch_count + fallthru_count > count_threshold ++ && (branch_count > fallthru_count * param_align_loop_iterations)) ++ /* In case there'no fallthru for the loop. ++ Nops inserted won't be executed. */ ++ : (branch_count > count_threshold ++ || (bb->count > bb->prev_bb->count * 10 ++ && (bb->prev_bb->count ++ <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) ++ { ++ rtx_insn* insn, *end_insn; ++ HOST_WIDE_INT size = 0; ++ bool padding_p = true; ++ basic_block tbb = bb; ++ unsigned cond_branch_num = 0; ++ bool detect_tight_loop_p = false; ++ ++ for (unsigned int i = 0; i != bb->loop_father->num_nodes; ++ i++, tbb = tbb->next_bb) ++ { ++ /* Only handle continuous cfg layout. */ ++ if (bb->loop_father != tbb->loop_father) ++ { ++ padding_p = false; ++ break; ++ } ++ ++ FOR_BB_INSNS (tbb, insn) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ size += ix86_min_insn_size (insn); ++ ++ /* We don't know size of inline asm. ++ Don't align loop for call. */ ++ if (asm_noperands (PATTERN (insn)) >= 0 ++ || CALL_P (insn)) ++ { ++ size = -1; ++ break; ++ } ++ } ++ ++ if (size == -1 || size > ix86_cost->prefetch_block) ++ { ++ padding_p = false; ++ break; ++ } ++ ++ FOR_EACH_EDGE (e, ei, tbb->succs) ++ { ++ /* It could be part of the loop. */ ++ if (e->dest == bb) ++ { ++ detect_tight_loop_p = true; ++ break; ++ } ++ } ++ ++ if (detect_tight_loop_p) ++ break; ++ ++ end_insn = BB_END (tbb); ++ if (JUMP_P (end_insn)) ++ { ++ /* For decoded icache: ++ 1. Up to two branches are allowed per Way. ++ 2. A non-conditional branch is the last micro-op in a Way. ++ */ ++ if (onlyjump_p (end_insn) ++ && (any_uncondjump_p (end_insn) ++ || single_succ_p (tbb))) ++ { ++ padding_p = false; ++ break; ++ } ++ else if (++cond_branch_num >= 2) ++ { ++ padding_p = false; ++ break; ++ } ++ } ++ ++ } ++ ++ if (padding_p && detect_tight_loop_p) ++ { ++ emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)), ++ GEN_INT (0)), label); ++ /* End of function. */ ++ if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ break; ++ /* Skip bb which already fits into one cacheline. */ ++ bb = tbb; ++ } ++ } ++ } ++ ++ loop_optimizer_finalize (); ++ free_dominance_info (CDI_DOMINATORS); ++} ++ ++namespace { ++ ++const pass_data pass_data_align_tight_loops = ++{ ++ RTL_PASS, /* type */ ++ "align_tight_loops", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_align_tight_loops : public rtl_opt_pass ++{ ++public: ++ pass_align_tight_loops (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_align_tight_loops, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) final override ++ { ++ return optimize && optimize_function_for_speed_p (cfun); ++ } ++ ++ unsigned int execute (function *) final override ++ { ++ timevar_push (TV_MACH_DEP); ++#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN ++ ix86_align_loops (); ++#endif ++ timevar_pop (TV_MACH_DEP); ++ return 0; ++ } ++}; // class pass_align_tight_loops ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_align_tight_loops (gcc::context *ctxt) ++{ ++ return new pass_align_tight_loops (ctxt); ++} ++ + /* This compares the priority of target features in function DECL1 + and DECL2. It returns positive value if DECL1 is higher priority, + negative value if DECL2 is higher priority and 0 if they are the +diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def +index 7d96766f7b9..e500f15c997 100644 +--- a/gcc/config/i386/i386-passes.def ++++ b/gcc/config/i386/i386-passes.def +@@ -31,5 +31,8 @@ along with GCC; see the file COPYING3. If not see + INSERT_PASS_BEFORE (pass_cse2, 1, pass_stv, true /* timode_p */); + + INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area); ++ /* pass_align_tight_loops must be after pass_insert_endbr_and_patchable_area. ++ PR116174. */ ++ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); + + INSERT_PASS_AFTER (pass_combine, 1, pass_remove_partial_avx_dependency); +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h +index 46214a63974..36c7b1aed42 100644 +--- a/gcc/config/i386/i386-protos.h ++++ b/gcc/config/i386/i386-protos.h +@@ -419,6 +419,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area + (gcc::context *); + extern rtl_opt_pass *make_pass_remove_partial_avx_dependency + (gcc::context *); ++extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); + + extern bool ix86_has_no_direct_extern_access; + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 6f89891d3cb..288c69467d6 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -23444,150 +23444,6 @@ ix86_split_stlf_stall_load () + } + } + +-/* When a hot loop can be fit into one cacheline, +- force align the loop without considering the max skip. */ +-static void +-ix86_align_loops () +-{ +- basic_block bb; +- +- /* Don't do this when we don't know cache line size. */ +- if (ix86_cost->prefetch_block == 0) +- return; +- +- loop_optimizer_init (AVOID_CFG_MODIFICATIONS); +- profile_count count_threshold = cfun->cfg->count_max / param_align_threshold; +- FOR_EACH_BB_FN (bb, cfun) +- { +- rtx_insn *label = BB_HEAD (bb); +- bool has_fallthru = 0; +- edge e; +- edge_iterator ei; +- +- if (!LABEL_P (label)) +- continue; +- +- profile_count fallthru_count = profile_count::zero (); +- profile_count branch_count = profile_count::zero (); +- +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- if (e->flags & EDGE_FALLTHRU) +- has_fallthru = 1, fallthru_count += e->count (); +- else +- branch_count += e->count (); +- } +- +- if (!fallthru_count.initialized_p () || !branch_count.initialized_p ()) +- continue; +- +- if (bb->loop_father +- && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) +- && (has_fallthru +- ? (!(single_succ_p (bb) +- && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) +- && optimize_bb_for_speed_p (bb) +- && branch_count + fallthru_count > count_threshold +- && (branch_count > fallthru_count * param_align_loop_iterations)) +- /* In case there'no fallthru for the loop. +- Nops inserted won't be executed. */ +- : (branch_count > count_threshold +- || (bb->count > bb->prev_bb->count * 10 +- && (bb->prev_bb->count +- <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) +- { +- rtx_insn* insn, *end_insn; +- HOST_WIDE_INT size = 0; +- bool padding_p = true; +- basic_block tbb = bb; +- unsigned cond_branch_num = 0; +- bool detect_tight_loop_p = false; +- +- for (unsigned int i = 0; i != bb->loop_father->num_nodes; +- i++, tbb = tbb->next_bb) +- { +- /* Only handle continuous cfg layout. */ +- if (bb->loop_father != tbb->loop_father) +- { +- padding_p = false; +- break; +- } +- +- FOR_BB_INSNS (tbb, insn) +- { +- if (!NONDEBUG_INSN_P (insn)) +- continue; +- size += ix86_min_insn_size (insn); +- +- /* We don't know size of inline asm. +- Don't align loop for call. */ +- if (asm_noperands (PATTERN (insn)) >= 0 +- || CALL_P (insn)) +- { +- size = -1; +- break; +- } +- } +- +- if (size == -1 || size > ix86_cost->prefetch_block) +- { +- padding_p = false; +- break; +- } +- +- FOR_EACH_EDGE (e, ei, tbb->succs) +- { +- /* It could be part of the loop. */ +- if (e->dest == bb) +- { +- detect_tight_loop_p = true; +- break; +- } +- } +- +- if (detect_tight_loop_p) +- break; +- +- end_insn = BB_END (tbb); +- if (JUMP_P (end_insn)) +- { +- /* For decoded icache: +- 1. Up to two branches are allowed per Way. +- 2. A non-conditional branch is the last micro-op in a Way. +- */ +- if (onlyjump_p (end_insn) +- && (any_uncondjump_p (end_insn) +- || single_succ_p (tbb))) +- { +- padding_p = false; +- break; +- } +- else if (++cond_branch_num >= 2) +- { +- padding_p = false; +- break; +- } +- } +- +- } +- +- if (padding_p && detect_tight_loop_p) +- { +- emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)), +- GEN_INT (0)), label); +- /* End of function. */ +- if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) +- break; +- /* Skip bb which already fits into one cacheline. */ +- bb = tbb; +- } +- } +- } +- +- loop_optimizer_finalize (); +- free_dominance_info (CDI_DOMINATORS); +-} +- + /* Implement machine specific optimizations. We implement padding of returns + for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ + static void +@@ -23611,8 +23467,6 @@ ix86_reorg (void) + #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN + if (TARGET_FOUR_JUMP_LIMIT) + ix86_avoid_jump_mispredicts (); +- +- ix86_align_loops (); + #endif + } + } +diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c b/gcc/testsuite/gcc.target/i386/pr116174.c +new file mode 100644 +index 00000000000..8877d0b51af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116174.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fcf-protection=branch" } */ ++ ++char * ++foo (char *dest, const char *src) ++{ ++ while ((*dest++ = *src++) != '\0') ++ /* nothing */; ++ return --dest; ++} ++ ++/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */ +-- +2.31.1 + diff --git a/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch b/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch new file mode 100644 index 0000000000000000000000000000000000000000..36d17a6d397f68ab2c1a3b495ffe5d2a2ca16deb --- /dev/null +++ b/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch @@ -0,0 +1,119 @@ +From 3f3f546bf830d019224aaf6cd349a1b9b738de1a Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Fri, 6 Sep 2024 05:24:07 -0700 +Subject: [PATCH] x86-64: Don't use temp for argument in a TImode register + +Don't use temp for a PARALLEL BLKmode argument of an EXPR_LIST expression +in a TImode register. Otherwise, the TImode variable will be put in +the GPR save area which guarantees only 8-byte alignment. + +gcc/ + + PR target/116621 + * config/i386/i386.cc (ix86_gimplify_va_arg): Don't use temp for + a PARALLEL BLKmode container of an EXPR_LIST expression in a + TImode register. + +gcc/testsuite/ + + PR target/116621 + * gcc.target/i386/pr116621.c: New test. + +Signed-off-by: H.J. Lu +(cherry picked from commit fa7bbb065c63aa802e0bbb04d605407dad58cf94) +--- + gcc/config/i386/i386.cc | 22 ++++++++++-- + gcc/testsuite/gcc.target/i386/pr116621.c | 43 ++++++++++++++++++++++++ + 2 files changed, 63 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116621.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index feefbe322de..8f1c1f9ccd0 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -4893,13 +4893,31 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + + examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); + +- need_temp = (!REG_P (container) ++ bool container_in_reg = false; ++ if (REG_P (container)) ++ container_in_reg = true; ++ else if (GET_CODE (container) == PARALLEL ++ && GET_MODE (container) == BLKmode ++ && XVECLEN (container, 0) == 1) ++ { ++ /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST ++ expression in a TImode register. In this case, temp isn't ++ needed. Otherwise, the TImode variable will be put in the ++ GPR save area which guarantees only 8-byte alignment. */ ++ rtx x = XVECEXP (container, 0, 0); ++ if (GET_CODE (x) == EXPR_LIST ++ && REG_P (XEXP (x, 0)) ++ && XEXP (x, 1) == const0_rtx) ++ container_in_reg = true; ++ } ++ ++ need_temp = (!container_in_reg + && ((needed_intregs && TYPE_ALIGN (type) > 64) + || TYPE_ALIGN (type) > 128)); + + /* In case we are passing structure, verify that it is consecutive block + on the register save area. If not we need to do moves. */ +- if (!need_temp && !REG_P (container)) ++ if (!need_temp && !container_in_reg) + { + /* Verify that all registers are strictly consecutive */ + if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) +diff --git a/gcc/testsuite/gcc.target/i386/pr116621.c b/gcc/testsuite/gcc.target/i386/pr116621.c +new file mode 100644 +index 00000000000..704266458a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116621.c +@@ -0,0 +1,43 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include ++#include ++ ++union S8302 ++{ ++ union ++ { ++ double b; ++ int c; ++ } a; ++ long double d; ++ unsigned short int f[5]; ++}; ++ ++union S8302 s8302; ++extern void check8302va (int i, ...); ++ ++int ++main (void) ++{ ++ memset (&s8302, '\0', sizeof (s8302)); ++ s8302.a.b = -221438.250000; ++ check8302va (1, s8302); ++ return 0; ++} ++ ++__attribute__((noinline, noclone)) ++void ++check8302va (int z, ...) ++{ ++ union S8302 arg, *p; ++ va_list ap; ++ ++ __builtin_va_start (ap, z); ++ p = &s8302; ++ arg = __builtin_va_arg (ap, union S8302); ++ if (p->a.b != arg.a.b) ++ __builtin_abort (); ++ __builtin_va_end (ap); ++} +-- +2.31.1 + diff --git a/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch b/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch new file mode 100644 index 0000000000000000000000000000000000000000..a11b84a358b1662af3b8f826e981d5c7e8ca85b9 --- /dev/null +++ b/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch @@ -0,0 +1,126 @@ +From 25cb153f93bb9ff3543ba8e31bbe7be4f6168aa4 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Wed, 25 Sep 2024 16:39:04 +0800 +Subject: [PATCH] x86: Don't use address override with segment regsiter + +Address override only applies to the (reg32) part in the thread address +fs:(reg32). Don't rewrite thread address like + +(set (reg:CCZ 17 flags) + (compare:CCZ (reg:SI 98 [ __gmpfr_emax.0_1 ]) + (mem/c:SI (plus:SI (plus:SI (unspec:SI [ + (const_int 0 [0]) + ] UNSPEC_TP) + (reg:SI 107)) + (const:SI (unspec:SI [ + (symbol_ref:SI ("previous_emax") [flags 0x1a] ) + ] UNSPEC_DTPOFF))) [1 previous_emax+0 S4 A32]))) + +if address override is used to avoid the invalid memory operand like + + cmpl %fs:previous_emax@dtpoff(%eax), %r12d + +gcc/ + + PR target/116839 + * config/i386/i386.cc (ix86_rewrite_tls_address_1): Make it + static. Return if TLS address is thread register plus an integer + register. + +gcc/testsuite/ + + PR target/116839 + * gcc.target/i386/pr116839.c: New file. + +Signed-off-by: H.J. Lu +(cherry picked from commit c79cc30862d7255ca15884aa956d1ccfa279d86a) +--- + gcc/config/i386/i386.cc | 9 ++++- + gcc/testsuite/gcc.target/i386/pr116839.c | 48 ++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116839.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 8f1c1f9ccd0..93d05a301c9 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -12458,7 +12458,7 @@ ix86_tls_address_pattern_p (rtx op) + } + + /* Rewrite *LOC so that it refers to a default TLS address space. */ +-void ++static void + ix86_rewrite_tls_address_1 (rtx *loc) + { + subrtx_ptr_iterator::array_type array; +@@ -12480,6 +12480,13 @@ ix86_rewrite_tls_address_1 (rtx *loc) + if (GET_CODE (u) == UNSPEC + && XINT (u, 1) == UNSPEC_TP) + { ++ /* NB: Since address override only applies to the ++ (reg32) part in fs:(reg32), return if address ++ override is used. */ ++ if (Pmode != word_mode ++ && REG_P (XEXP (*x, 1 - i))) ++ return; ++ + addr_space_t as = DEFAULT_TLS_SEG_REG; + + *x = XEXP (*x, 1 - i); +diff --git a/gcc/testsuite/gcc.target/i386/pr116839.c b/gcc/testsuite/gcc.target/i386/pr116839.c +new file mode 100644 +index 00000000000..e5df8256251 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116839.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-require-effective-target maybe_x32 } */ ++/* { dg-options "-mx32 -O2 -fPIC -mtls-dialect=gnu2" } */ ++/* { dg-final { scan-assembler-not "cmpl\[ \t\]+%fs:previous_emax@dtpoff\\(%eax\\)" } } */ ++ ++typedef long mpfr_prec_t; ++typedef long mpfr_exp_t; ++typedef struct { ++ mpfr_prec_t _mpfr_prec; ++} __mpfr_struct; ++typedef __mpfr_struct mpfr_t[1]; ++extern _Thread_local mpfr_exp_t __gmpfr_emax; ++static _Thread_local mpfr_exp_t previous_emax; ++static _Thread_local mpfr_t bound_emax; ++extern const mpfr_t __gmpfr_const_log2_RNDD; ++extern const mpfr_t __gmpfr_const_log2_RNDU; ++ ++typedef enum { ++ MPFR_RNDN=0, ++ MPFR_RNDZ, ++ MPFR_RNDU, ++ MPFR_RNDD, ++ MPFR_RNDA, ++ MPFR_RNDF, ++ MPFR_RNDNA=-1 ++} mpfr_rnd_t; ++typedef __mpfr_struct *mpfr_ptr; ++typedef const __mpfr_struct *mpfr_srcptr; ++void mpfr_mul (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); ++ ++void ++foo (void) ++{ ++ mpfr_exp_t saved_emax; ++ ++ if (__gmpfr_emax != previous_emax) ++ { ++ saved_emax = __gmpfr_emax; ++ ++ bound_emax->_mpfr_prec = 32; ++ ++ mpfr_mul (bound_emax, saved_emax < 0 ? ++ __gmpfr_const_log2_RNDD : __gmpfr_const_log2_RNDU, ++ MPFR_RNDU); ++ previous_emax = saved_emax; ++ __gmpfr_emax = saved_emax; ++ } ++} +-- +2.31.1 + diff --git a/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch b/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch new file mode 100644 index 0000000000000000000000000000000000000000..969c80c8e1aa9eb60bb4a4c3f79cd785dddc41ad --- /dev/null +++ b/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch @@ -0,0 +1,77 @@ +From 1032b72548c47a199e0407c69d6740d6c3341f43 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Fri, 4 Oct 2024 16:21:15 +0800 +Subject: [PATCH] x86: Disable stack protector for naked functions + +Since naked functions should not enable stack protector, define +TARGET_STACK_PROTECT_RUNTIME_ENABLED_P to disable stack protector +for naked functions. + +gcc/ + + PR target/116962 + * config/i386/i386.cc (ix86_stack_protect_runtime_enabled_p): New + function. + (TARGET_STACK_PROTECT_RUNTIME_ENABLED_P): New. + +gcc/testsuite/ + + PR target/116962 + * gcc.target/i386/pr116962.c: New file. + +Signed-off-by: H.J. Lu +(cherry picked from commit 7d2845da112214f064e7b24531cc67e256b5177e) +--- + gcc/config/i386/i386.cc | 11 +++++++++++ + gcc/testsuite/gcc.target/i386/pr116962.c | 10 ++++++++++ + 2 files changed, 21 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116962.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 2a0a79888be..f8ab1893985 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -24265,6 +24265,13 @@ ix86_stack_protect_guard (void) + return default_stack_protect_guard (); + } + ++static bool ++ix86_stack_protect_runtime_enabled_p (void) ++{ ++ /* Naked functions should not enable stack protector. */ ++ return !ix86_function_naked (current_function_decl); ++} ++ + /* For 32-bit code we can save PIC register setup by using + __stack_chk_fail_local hidden function instead of calling + __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC +@@ -26582,6 +26589,10 @@ ix86_libgcc_floating_mode_supported_p + #undef TARGET_STACK_PROTECT_GUARD + #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard + ++#undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P ++#define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \ ++ ix86_stack_protect_runtime_enabled_p ++ + #if !TARGET_MACHO + #undef TARGET_STACK_PROTECT_FAIL + #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail +diff --git a/gcc/testsuite/gcc.target/i386/pr116962.c b/gcc/testsuite/gcc.target/i386/pr116962.c +new file mode 100644 +index 00000000000..ced16eee746 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116962.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile { target fstack_protector } } */ ++/* { dg-options "-O2 -fstack-protector-all" } */ ++/* { dg-final { scan-assembler-not "__stack_chk_fail" } } */ ++ ++__attribute__ ((naked)) ++void ++foo (void) ++{ ++ asm ("ret"); ++} +-- +2.31.1 + diff --git a/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch b/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch new file mode 100644 index 0000000000000000000000000000000000000000..23fed87d5122c2831b8d94498c010975f80f2295 --- /dev/null +++ b/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch @@ -0,0 +1,36 @@ +From 5f47dc6e9aa82e1c00ed030cb9469cd84df8691d Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Tue, 11 Feb 2025 13:47:54 +0800 +Subject: [PATCH] x86: Correct ASM_OUTPUT_SYMBOL_REF + +x is not a macro argument. It just happens to work as final.cc passes +x for 2nd argument: + +final.cc: ASM_OUTPUT_SYMBOL_REF (file, x); + + PR target/118825 + * config/i386/i386.h (ASM_OUTPUT_SYMBOL_REF): Replace x with + SYM. + +Signed-off-by: H.J. Lu +(cherry picked from commit 7317fc0b03380a83ad03a5fc4fabef5f38c44c9d) +--- + gcc/config/i386/i386.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 1c456c3422f..2fc82b175e6 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2229,7 +2229,7 @@ extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; + #define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \ + do { \ + const char *name \ +- = assemble_name_resolve (XSTR (x, 0)); \ ++ = assemble_name_resolve (XSTR (SYM, 0)); \ + /* In -masm=att wrap identifiers that start with $ \ + into parens. */ \ + if (ASSEMBLER_DIALECT == ASM_ATT \ +-- +2.31.1 + diff --git a/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch b/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb6767759511d4933550d95664b742edebc833f6 --- /dev/null +++ b/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch @@ -0,0 +1,65 @@ +From 0e4986a933e0f69b0d34cfefde117b510e4b09e7 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Wed, 26 Feb 2025 11:28:45 +0800 +Subject: [PATCH] i386: Treat Granite Rapids/Granite Rapids-D similar as + Sapphire Rapids in x86-tune.def + +Since GNR, GNR-D are both P-core based, we should treat them +just like SPR for now. + +gcc/ChangeLog: + + * config/i386/x86-tune.def + (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): Add GNR, GNR-D. + (X86_TUNE_AVOID_256FMA_CHAINS): Ditto. + (X86_TUNE_AVX512_MOVE_BY_PIECES): Ditto. + (X86_TUNE_AVX512_STORE_BY_PIECES): Ditto. +--- + gcc/config/i386/x86-tune.def | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 46e847589..0523a75a2 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -87,8 +87,8 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + several insns to break false dependency on the dest register for GLC + micro-architecture. */ + DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, +- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_CORE_HYBRID +- | m_CORE_ATOM) ++ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS ++ | m_GRANITERAPIDS_D | m_CORE_HYBRID | m_CORE_ATOM) + + /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies + are resolved on SSE register parts instead of whole registers, so we may +@@ -521,7 +521,8 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ + DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4 +- | m_CORE_HYBRID | m_SAPPHIRERAPIDS | m_CORE_ATOM | m_GENERIC) ++ | m_CORE_HYBRID | m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D ++ | m_CORE_ATOM | m_GENERIC) + + /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or + smaller FMA chain. */ +@@ -583,12 +584,14 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", + /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) ++ m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D ++ | m_ZNVER4 | m_ZNVER5) + + /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit + AVX instructions. */ + DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", +- m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) ++ m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D ++ | m_ZNVER4 | m_ZNVER5) + + /*****************************************************************************/ + /*****************************************************************************/ +-- +2.31.1 + diff --git a/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch b/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch new file mode 100644 index 0000000000000000000000000000000000000000..444a6db885976247eddbbce2463cd496c6f6e1ce --- /dev/null +++ b/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch @@ -0,0 +1,138 @@ +From 5ba6fdc5476d33c57f4751cae93054fdbc7211c0 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Mon, 24 Mar 2025 15:51:16 +0800 +Subject: [PATCH] i386: Add -mavx10.1 back with 512 bit alias + +When AVX10.1 options are added into GCC 14, E-core is supposed to +support up to 256 bit vector width, while P-core up to 512 bit vector +width. Therefore, we added avx10.1-256 and avx10.1-512 options into +compiler and alias avx10.1 to 256 bit for compatibility since there +will be real platforms with 256 bit only support. + +However, all the future platforms will now support 512 bit vector width, +including P-core and E-core. Therefore, we could alias avx10.1 directly +to 512 bit. However, avx10.1 alias to 256 bit has been there in GCC 14.1 +and GCC 14.2, so we have to raise a warning since GCC 14.3 for this +behavior change. + +While backporting the patch from GCC 15, we choose to only warn when +users use -mavx10.1 option in order not to interrupt the usage of other +options since -mavx10.1-256/512 and -mevex512 will be dropped in GCC 16. +There is no need to warn them this early in GCC 14 to overwhelm users. + +gcc/ChangeLog: + + * common/config/i386/i386-isas.h: Add avx10.1. + * config/i386/i386-c.cc (ix86_target_macros_internal): Ditto. + * config/i386/i386-options.cc + (ix86_valid_target_attribute_inner_p): Ditto. + * config/i386/i386.opt: Ditto. + * config/i386/i386.opt.urls: Ditto. + * doc/extend.texi: Ditto. + * doc/sourcebuild.texi: Ditto. +--- + gcc/common/config/i386/i386-isas.h | 1 + + gcc/config/i386/i386-c.cc | 5 +---- + gcc/config/i386/i386-options.cc | 1 + + gcc/config/i386/i386.opt | 5 +++++ + gcc/config/i386/i386.opt.urls | 3 +++ + gcc/doc/extend.texi | 6 ++++++ + gcc/doc/sourcebuild.texi | 3 +++ + 7 files changed, 20 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 017c795e211..cdbc188620a 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -195,4 +195,5 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("usermsr", FEATURE_USER_MSR, P_NONE, "-musermsr") + ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1_256, P_AVX10_1_256, "-mavx10.1-256") + ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1-512") ++ ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1") + ISA_NAMES_TABLE_END +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 07f4936ba91..0103b8543a1 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -743,10 +743,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR) + def_or_undef (parse_in, "__USER_MSR__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256) +- { +- def_or_undef (parse_in, "__AVX10_1_256__"); +- def_or_undef (parse_in, "__AVX10_1__"); +- } ++ def_or_undef (parse_in, "__AVX10_1_256__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_512) + def_or_undef (parse_in, "__AVX10_1_512__"); + if (isa_flag2 & OPTION_MASK_ISA2_APX_F) +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 11c6ddf0f44..57c83a9aa1c 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1137,6 +1137,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], + IX86_ATTR_ISA ("usermsr", OPT_musermsr), + IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), + IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512), ++ IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1_512), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index f99c4e3ae5d..df95963dd9f 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1380,3 +1380,8 @@ mavx10.1-512 + Target Mask(ISA2_AVX10_1_512) Var(ix86_isa_flags2) Save + Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, + and AVX10.1-512 built-in functions and code generation. ++ ++mavx10.1 ++Target Alias(mavx10.1-512) Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3) ++Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ++and AVX10.1-512 built-in functions and code generation. +diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls +index 3ed76635002..81c5bb9a927 100644 +--- a/gcc/config/i386/i386.opt.urls ++++ b/gcc/config/i386/i386.opt.urls +@@ -615,3 +615,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) + mavx10.1-512 + UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) + ++mavx10.1 ++UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) ++ +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 8bd30bb2a46..27fefb30041 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -7395,6 +7395,12 @@ Disable the generation of the AVX10.1 instructions. + Enable the generation of the AVX10.1 instructions with 512 bit support. + Disable the generation of the AVX10.1 instructions. + ++@cindex @code{target("avx10.1")} function attribute, x86 ++@item avx10.1 ++@itemx no-avx10.1 ++Enable the generation of the AVX10.1 instructions with 512 bit support. ++Disable the generation of the AVX10.1 instructions. ++ + @cindex @code{target("cld")} function attribute, x86 + @item cld + @itemx no-cld +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 5b026cfe073..23dedef4161 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2549,6 +2549,9 @@ Target supports the execution of @code{avx10.1-256} instructions. + @item avx10.1-512 + Target supports the execution of @code{avx10.1-512} instructions. + ++@item avx10.1 ++Target supports the execution of @code{avx10.1} instructions. ++ + @item avx2 + Target supports compiling @code{avx2} instructions. + +-- +2.31.1 + diff --git a/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch b/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch new file mode 100644 index 0000000000000000000000000000000000000000..1aceb888f4f212fcbb9f78ac4526563b026e98f3 --- /dev/null +++ b/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch @@ -0,0 +1,154 @@ +From d275b3748a23aa4b6b821ae3bdf1751010923773 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Tue, 27 Aug 2024 07:03:22 -0700 +Subject: [PATCH] Extend check-function-bodies to allow label and directives + +As PR target/116174 shown, we may need to verify labels and the directive +order. Extend check-function-bodies to support matched output lines to +allow label and directives. + +gcc/ + + * doc/sourcebuild.texi (check-function-bodies): Add an optional + argument for matched output lines. + +gcc/testsuite/ + + * gcc.target/i386/pr116174.c: Use check-function-bodies. + * lib/scanasm.exp (parse_function_bodies): Append the line if + $up_config(matched) matches the line. + (check-function-bodies): Add an argument for matched. Set + up_config(matched) to $matched. Append the expected line without + $config(line_prefix) to function_regexp if it starts with ".L". + +Signed-off-by: H.J. Lu +(cherry picked from commit d6bb1e257fc414d21bc31faa7ddecbc93a197e3c) +--- + gcc/doc/sourcebuild.texi | 9 ++++++--- + gcc/testsuite/gcc.target/i386/pr116174.c | 18 +++++++++++++++--- + gcc/testsuite/lib/scanasm.exp | 15 +++++++++++++-- + 3 files changed, 34 insertions(+), 8 deletions(-) + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 23dedef4161..c8130dc1ba9 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -3440,7 +3440,7 @@ assembly output. + Passes if @var{symbol} is not defined as a hidden symbol in the test's + assembly output. + +-@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ target/xfail @var{selector} @}]] ++@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ target/xfail @var{selector} @} [@var{matched}]]] + Looks through the source file for comments that give the expected assembly + output for selected functions. Each line of expected output starts with the + prefix string @var{prefix} and the expected output for a function as a whole +@@ -3467,8 +3467,11 @@ Depending on the configuration (see + @code{configure_check-function-bodies} in + @file{gcc/testsuite/lib/scanasm.exp}), the test may discard from the + compiler's assembly output directives such as @code{.cfi_startproc}, +-local label definitions such as @code{.LFB0}, and more. +-It then matches the result against the expected ++local label definitions such as @code{.LFB0}, and more. This behavior ++can be overridden using the optional @var{matched} argument, which ++specifies a regexp for lines that should not be discarded in this way. ++ ++The test then matches the result against the expected + output for a function as a single regular expression. This means that + later lines can use backslashes to refer back to @samp{(@dots{})} + captures on earlier lines. For example: +diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c b/gcc/testsuite/gcc.target/i386/pr116174.c +index 8877d0b51af..686aeb9ff31 100644 +--- a/gcc/testsuite/gcc.target/i386/pr116174.c ++++ b/gcc/testsuite/gcc.target/i386/pr116174.c +@@ -1,6 +1,20 @@ + /* { dg-do compile { target *-*-linux* } } */ +-/* { dg-options "-O2 -fcf-protection=branch" } */ ++/* { dg-options "-O2 -g0 -fcf-protection=branch" } */ ++/* Keep labels and directives ('.p2align', '.cfi_startproc'). ++/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ + ++/* ++**foo: ++**.LFB0: ++** .cfi_startproc ++** ( ++** endbr64 ++** .p2align 5 ++** | ++** endbr32 ++** ) ++**... ++*/ + char * + foo (char *dest, const char *src) + { +@@ -8,5 +22,3 @@ foo (char *dest, const char *src) + /* nothing */; + return --dest; + } +- +-/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */ +diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp +index 6cf9997240d..d1c8e3b5079 100644 +--- a/gcc/testsuite/lib/scanasm.exp ++++ b/gcc/testsuite/lib/scanasm.exp +@@ -952,6 +952,9 @@ proc parse_function_bodies { config filename result } { + verbose "parse_function_bodies: $function_name:\n$function_body" + set up_result($function_name) $function_body + set in_function 0 ++ } elseif { $up_config(matched) ne "" \ ++ && [regexp $up_config(matched) $line] } { ++ append function_body $line "\n" + } elseif { [regexp $up_config(fluff) $line] } { + verbose "parse_function_bodies: $function_name: ignoring fluff line: $line" + } else { +@@ -982,7 +985,7 @@ proc check_function_body { functions name body_regexp } { + + # Check the implementations of functions against expected output. Used as: + # +-# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } } ++# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR [MATCHED]]] } } + # + # See sourcebuild.texi for details. + +@@ -990,7 +993,7 @@ proc check-function-bodies { args } { + if { [llength $args] < 2 } { + error "too few arguments to check-function-bodies" + } +- if { [llength $args] > 4 } { ++ if { [llength $args] > 5 } { + error "too many arguments to check-function-bodies" + } + +@@ -1029,6 +1032,11 @@ proc check-function-bodies { args } { + } + } + ++ set matched "" ++ if { [llength $args] >= 5 } { ++ set matched [lindex $args 4] ++ } ++ + set testcase [testname-for-summary] + # The name might include a list of options; extract the file name. + set filename [lindex $testcase 0] +@@ -1048,6 +1056,7 @@ proc check-function-bodies { args } { + # (name in \1). This may be different from '$config(start)'. + set start_expected {^(\S+):$} + ++ set config(matched) $matched + configure_check-function-bodies config + set have_bodies 0 + if { [is_remote host] } { +@@ -1090,6 +1099,8 @@ proc check-function-bodies { args } { + append function_regexp ")" + } elseif { [string equal $line "..."] } { + append function_regexp ".*" ++ } elseif { [regexp {^\.L} $line] } { ++ append function_regexp $line "\n" + } else { + append function_regexp $config(line_prefix) $line "\n" + } +-- +2.31.1 + diff --git a/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch b/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch new file mode 100644 index 0000000000000000000000000000000000000000..121bae33fda57254a7893f455373cdc3bd1c4619 --- /dev/null +++ b/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch @@ -0,0 +1,253 @@ +From a33e2808c8987dcd422c5156c47bcf672ddc7f9f Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Sun, 13 Apr 2025 12:20:42 -0700 +Subject: [PATCH] APX: Don't use red-zone with 32 GPRs and no caller-saved + registers + +Don't use red-zone when there are no caller-saved registers with 32 GPRs +since 128-byte red-zone is too small for 31 GPRs. + +gcc/ + + PR target/119784 + * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone + with 32 GPRs and no caller-saved registers. + +gcc/testsuite/ + + PR target/119784 + * gcc.target/i386/pr119784a.c: New test. + * gcc.target/i386/pr119784b.c: Likewise. + +Signed-off-by: H.J. Lu +(cherry picked from commit 0a074b8c7e79f9d9359d044f1499b0a9ce9d2801) +--- + gcc/config/i386/i386.cc | 6 ++ + gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++ + 3 files changed, 189 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index dde4ba5ca19..ce726f40f09 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -444,6 +444,9 @@ int ix86_arch_specified; + indirect thunk pushes the return address onto stack, destroying + red-zone. + ++ NB: Don't use red-zone for functions with no_caller_saved_registers ++ and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. ++ + TODO: If we can reserve the first 2 WORDs, for PUSH and, another + for CALL, in red-zone, we can allow local indirect jumps with + indirect thunk. */ +@@ -453,6 +456,9 @@ ix86_using_red_zone (void) + { + return (TARGET_RED_ZONE + && !TARGET_64BIT_MS_ABI ++ && (!TARGET_APX_EGPR ++ || (cfun->machine->call_saved_registers ++ != TYPE_NO_CALLER_SAVED_REGISTERS)) + && (!cfun->machine->has_local_indirect_jump + || cfun->machine->indirect_branch_type == indirect_branch_keep)); + } +diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c b/gcc/testsuite/gcc.target/i386/pr119784a.c +new file mode 100644 +index 00000000000..8a119d4cc1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr119784a.c +@@ -0,0 +1,96 @@ ++/* { dg-do compile { target { *-*-linux* && lp64 } } } */ ++/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ ++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ ++/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ ++ ++/* start must save and restore all caller saved registers. */ ++ ++/* ++**start: ++**.LFB[0-9]+: ++** .cfi_startproc ++** subq \$248, %rsp ++**... ++** movq %rax, \(%rsp\) ++** movq %rdx, 8\(%rsp\) ++** movq %rcx, 16\(%rsp\) ++** movq %rbx, 24\(%rsp\) ++** movq %rsi, 32\(%rsp\) ++** movq %rdi, 40\(%rsp\) ++**... ++** movq %rbp, 48\(%rsp\) ++** movq %r8, 56\(%rsp\) ++** movq %r9, 64\(%rsp\) ++** movq %r10, 72\(%rsp\) ++** movq %r11, 80\(%rsp\) ++** movq %r12, 88\(%rsp\) ++** movq %r13, 96\(%rsp\) ++** movq %r14, 104\(%rsp\) ++** movq %r15, 112\(%rsp\) ++** movq %r16, 120\(%rsp\) ++** movq %r17, 128\(%rsp\) ++** movq %r18, 136\(%rsp\) ++** movq %r19, 144\(%rsp\) ++** movq %r20, 152\(%rsp\) ++** movq %r21, 160\(%rsp\) ++** movq %r22, 168\(%rsp\) ++** movq %r23, 176\(%rsp\) ++** movq %r24, 184\(%rsp\) ++** movq %r25, 192\(%rsp\) ++** movq %r26, 200\(%rsp\) ++** movq %r27, 208\(%rsp\) ++** movq %r28, 216\(%rsp\) ++** movq %r29, 224\(%rsp\) ++** movq %r30, 232\(%rsp\) ++** movq %r31, 240\(%rsp\) ++**... ++** call \*code\(%rip\) ++** movq \(%rsp\), %rax ++** movq 8\(%rsp\), %rdx ++** movq 16\(%rsp\), %rcx ++** movq 24\(%rsp\), %rbx ++** movq 32\(%rsp\), %rsi ++** movq 40\(%rsp\), %rdi ++** movq 48\(%rsp\), %rbp ++** movq 56\(%rsp\), %r8 ++** movq 64\(%rsp\), %r9 ++** movq 72\(%rsp\), %r10 ++** movq 80\(%rsp\), %r11 ++** movq 88\(%rsp\), %r12 ++** movq 96\(%rsp\), %r13 ++** movq 104\(%rsp\), %r14 ++** movq 112\(%rsp\), %r15 ++** movq 120\(%rsp\), %r16 ++** movq 128\(%rsp\), %r17 ++** movq 136\(%rsp\), %r18 ++** movq 144\(%rsp\), %r19 ++** movq 152\(%rsp\), %r20 ++** movq 160\(%rsp\), %r21 ++** movq 168\(%rsp\), %r22 ++** movq 176\(%rsp\), %r23 ++** movq 184\(%rsp\), %r24 ++** movq 192\(%rsp\), %r25 ++** movq 200\(%rsp\), %r26 ++** movq 208\(%rsp\), %r27 ++** movq 216\(%rsp\), %r28 ++** movq 224\(%rsp\), %r29 ++** movq 232\(%rsp\), %r30 ++** movq 240\(%rsp\), %r31 ++** addq \$248, %rsp ++**... ++** ret ++** .cfi_endproc ++**... ++*/ ++ ++#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers)) ++#define SAVE_REGS __attribute__((no_caller_saved_registers)) ++ ++typedef DONT_SAVE_REGS void (*op_t)(void); ++ ++extern op_t code[]; ++ ++SAVE_REGS void start() ++{ ++ code[0](); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c b/gcc/testsuite/gcc.target/i386/pr119784b.c +new file mode 100644 +index 00000000000..c6761976ed6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr119784b.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile { target { *-*-linux* && x32 } } } */ ++/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ ++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ ++/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ ++ ++/* start must save and restore all caller saved registers. */ ++ ++/* ++**start: ++**.LFB[0-9]+: ++** .cfi_startproc ++** subl \$248, %esp ++**... ++** movq %rax, \(%rsp\) ++** movq %rdx, 8\(%rsp\) ++** movq %rcx, 16\(%rsp\) ++** movq %rbx, 24\(%rsp\) ++** movq %rsi, 32\(%rsp\) ++** movq %rdi, 40\(%rsp\) ++**... ++** movq %rbp, 48\(%rsp\) ++** movq %r8, 56\(%rsp\) ++** movq %r9, 64\(%rsp\) ++** movq %r10, 72\(%rsp\) ++** movq %r11, 80\(%rsp\) ++** movq %r12, 88\(%rsp\) ++** movq %r13, 96\(%rsp\) ++** movq %r14, 104\(%rsp\) ++** movq %r15, 112\(%rsp\) ++** movq %r16, 120\(%rsp\) ++** movq %r17, 128\(%rsp\) ++** movq %r18, 136\(%rsp\) ++** movq %r19, 144\(%rsp\) ++** movq %r20, 152\(%rsp\) ++** movq %r21, 160\(%rsp\) ++** movq %r22, 168\(%rsp\) ++** movq %r23, 176\(%rsp\) ++** movq %r24, 184\(%rsp\) ++** movq %r25, 192\(%rsp\) ++** movq %r26, 200\(%rsp\) ++** movq %r27, 208\(%rsp\) ++** movq %r28, 216\(%rsp\) ++** movq %r29, 224\(%rsp\) ++** movq %r30, 232\(%rsp\) ++** movq %r31, 240\(%rsp\) ++**... ++** movl code\(%rip\), %ebp ++** call \*%rbp ++** movq \(%rsp\), %rax ++** movq 8\(%rsp\), %rdx ++** movq 16\(%rsp\), %rcx ++** movq 24\(%rsp\), %rbx ++** movq 32\(%rsp\), %rsi ++** movq 40\(%rsp\), %rdi ++** movq 48\(%rsp\), %rbp ++** movq 56\(%rsp\), %r8 ++** movq 64\(%rsp\), %r9 ++** movq 72\(%rsp\), %r10 ++** movq 80\(%rsp\), %r11 ++** movq 88\(%rsp\), %r12 ++** movq 96\(%rsp\), %r13 ++** movq 104\(%rsp\), %r14 ++** movq 112\(%rsp\), %r15 ++** movq 120\(%rsp\), %r16 ++** movq 128\(%rsp\), %r17 ++** movq 136\(%rsp\), %r18 ++** movq 144\(%rsp\), %r19 ++** movq 152\(%rsp\), %r20 ++** movq 160\(%rsp\), %r21 ++** movq 168\(%rsp\), %r22 ++** movq 176\(%rsp\), %r23 ++** movq 184\(%rsp\), %r24 ++** movq 192\(%rsp\), %r25 ++** movq 200\(%rsp\), %r26 ++** movq 208\(%rsp\), %r27 ++** movq 216\(%rsp\), %r28 ++** movq 224\(%rsp\), %r29 ++** movq 232\(%rsp\), %r30 ++** movq 240\(%rsp\), %r31 ++** addl \$248, %esp ++**... ++** ret ++** .cfi_endproc ++**... ++*/ ++ ++#include "pr119784a.c" +-- +2.31.1 + diff --git a/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch b/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..db8c931eb92be49afbbd3f9ce95d745dc5a38b78 --- /dev/null +++ b/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch @@ -0,0 +1,43 @@ +From 8cc672d3d3a2f090d840fb2a8c344cf927715d6c Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Mon, 14 Apr 2025 15:49:26 -0700 +Subject: [PATCH] x86: Update gcc.target/i386/apx-interrupt-1.c + +ix86_add_cfa_restore_note omits the REG_CFA_RESTORE REG note for registers +pushed in red-zone. Since + +commit 0a074b8c7e79f9d9359d044f1499b0a9ce9d2801 +Author: H.J. Lu +Date: Sun Apr 13 12:20:42 2025 -0700 + + APX: Don't use red-zone with 32 GPRs and no caller-saved registers + +disabled red-zone, update gcc.target/i386/apx-interrupt-1.c to expect +31 .cfi_restore directives. + + PR target/119784 + * gcc.target/i386/apx-interrupt-1.c: Expect 31 .cfi_restore + directives. + +Signed-off-by: H.J. Lu +(cherry picked from commit 5ed2fa4768f3d318b8ace5bd4a095596e06fad7b) +--- + gcc/testsuite/gcc.target/i386/apx-interrupt-1.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +index fefe2e6d6fc..fa1acc7a142 100644 +--- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c ++++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +@@ -66,7 +66,7 @@ void foo (void *frame) + /* { dg-final { scan-assembler-times {\t\.cfi_offset 132, -120} 1 } } */ + /* { dg-final { scan-assembler-times {\t\.cfi_offset 131, -128} 1 } } */ + /* { dg-final { scan-assembler-times {\t\.cfi_offset 130, -136} 1 } } */ +-/* { dg-final { scan-assembler-times ".cfi_restore" 15} } */ ++/* { dg-final { scan-assembler-times ".cfi_restore" 31 } } */ + /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)ax" 1 } } */ + /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)bx" 1 } } */ + /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)cx" 1 } } */ +-- +2.31.1 + diff --git a/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch b/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch new file mode 100644 index 0000000000000000000000000000000000000000..d73a4a0d7dc7f65593065c1e47bc90fec6fb3f5e --- /dev/null +++ b/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch @@ -0,0 +1,36 @@ +From 058f489e8223db7b2b5ebf386580e3f407638382 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Mon, 28 Apr 2025 07:45:50 -0700 +Subject: [PATCH] Remove other processors from X86_TUNE_DEST_FALSE_DEP_FOR_GLC + except GLC + +Since the tune if only for GLC(sapphirerapids and alderlake-P). + +gcc/ChangeLog: + + * config/i386/x86-tune.def (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): + Remove other processor except for GLC since this one is only + for GLC. + +(cherry picked from commit 1ad6e171b126a82f38b1e8cbfd207f1d91c58a59) +--- + gcc/config/i386/x86-tune.def | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 0523a75a2..f90d270fd 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -87,8 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + several insns to break false dependency on the dest register for GLC + micro-architecture. */ + DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, +- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS +- | m_GRANITERAPIDS_D | m_CORE_HYBRID | m_CORE_ATOM) ++ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE) + + /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies + are resolved on SSE register parts instead of whole registers, so we may +-- +2.31.1 + diff --git a/gcc-14.spec b/gcc-14.spec index db1e941e9a88d0798938e59911e13eebda55a9c1..2e6f9ccc360d069362ba4cda082a62fefb63cc74 100644 --- a/gcc-14.spec +++ b/gcc-14.spec @@ -90,7 +90,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?_scl_prefix}gcc%{gcc_ver} Version: 14.2.1 -Release: 7 +Release: 8 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -195,6 +195,17 @@ Patch1023: GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch Patch1024: GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch Patch1025: GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch Patch1026: GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch +Patch1027: GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch +Patch1028: GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch +Patch1029: GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch +Patch1030: GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch +Patch1031: GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch +Patch1032: GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch +Patch1033: GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch +Patch1034: GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch +Patch1035: GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch +Patch1036: GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch +Patch1037: GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -2225,6 +2236,9 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri May 30 2025 Hu, Lin - 14.2.1-8 +- [Sync] Sync patches from gcc.gnu.org's releases/gcc-14 + * Mon Feb 17 2025 Hu, Lin - 14.2.1-7 - [Sync] Sync patches from gcc.gnu.org's releases/gcc-14