diff --git a/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch b/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch new file mode 100644 index 0000000000000000000000000000000000000000..c68c3122359eecc3fae752f2d6f5428b46d900c4 --- /dev/null +++ b/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch @@ -0,0 +1,107 @@ +From d77237154f3f79ac83af459a0517a4472a35fb24 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Wed, 24 Jul 2024 11:29:23 +0800 +Subject: [PATCH 01/21] Refine constraint "Bk" to + define_special_memory_constraint. + +For below pattern, RA may still allocate r162 as v/k register, try to +reload for address with leaq __libc_tsd_CTYPE_B@gottpoff(%rip), %rsi +which result a linker error. + +(set (reg:DI 162) + (mem/u/c:DI + (const:DI (unspec:DI + [(symbol_ref:DI ("a") [flags 0x60] )] + UNSPEC_GOTNTPOFF)) + +Quote from H.J for why linker issue an error. +>What do these do: +> +> leaq __libc_tsd_CTYPE_B@gottpoff(%rip), %rax +> vmovq (%rax), %xmm0 +> +>From x86-64 TLS psABI: +> +>The assembler generates for the x@gottpoff(%rip) expressions a R X86 +>64 GOTTPOFF relocation for the symbol x which requests the linker to +>generate a GOT entry with a R X86 64 TPOFF64 relocation. The offset of +>the GOT entry relative to the end of the instruction is then used in +>the instruction. The R X86 64 TPOFF64 relocation is pro- cessed at +>program startup time by the dynamic linker by looking up the symbol x +>in the modules loaded at that point. The offset is written in the GOT +>entry and later loaded by the addq instruction. +> +>The above code sequence looks wrong to me. + +gcc/ChangeLog: + + PR target/116043 + * config/i386/constraints.md (Bk): Refine to + define_special_memory_constraint. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr116043.c: New test. + +(cherry picked from commit a295076bee293aa3112c615f9af7a27231816a36) +--- + gcc/config/i386/constraints.md | 2 +- + gcc/testsuite/gcc.target/i386/pr116043.c | 33 ++++++++++++++++++++++++ + 2 files changed, 34 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116043.c + +diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md +index 7508d7a58bd..b760e7c221a 100644 +--- a/gcc/config/i386/constraints.md ++++ b/gcc/config/i386/constraints.md +@@ -187,7 +187,7 @@ + "@internal Vector memory operand." + (match_operand 0 "vector_memory_operand")) + +-(define_memory_constraint "Bk" ++(define_special_memory_constraint "Bk" + "@internal TLS address that allows insn using non-integer registers." + (and (match_operand 0 "memory_operand") + (not (match_test "ix86_gpr_tls_address_pattern_p (op)")))) +diff --git a/gcc/testsuite/gcc.target/i386/pr116043.c b/gcc/testsuite/gcc.target/i386/pr116043.c +new file mode 100644 +index 00000000000..76553496c10 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116043.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx512bf16 -O3" } */ ++/* { dg-final { scan-assembler-not {(?n)lea.*@gottpoff} } } */ ++ ++extern __thread int a, c, i, j, k, l; ++int *b; ++struct d { ++ int e; ++} f, g; ++char *h; ++ ++void m(struct d *n) { ++ b = &k; ++ for (; n->e; b++, n--) { ++ i = b && a; ++ if (i) ++ j = c; ++ } ++} ++ ++char *o(struct d *n) { ++ for (; n->e;) ++ return h; ++} ++ ++int q() { ++ if (l) ++ return 1; ++ int p = *o(&g); ++ m(&f); ++ m(&g); ++ l = p; ++} +-- +2.31.1 + diff --git a/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch b/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch new file mode 100644 index 0000000000000000000000000000000000000000..be5843eba13b8bd553a047f375f18349d39a5e0d --- /dev/null +++ b/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch @@ -0,0 +1,263 @@ +From b1d999fd3609fb4649540952396131370769da65 Mon Sep 17 00:00:00 2001 +From: Lingling Kong +Date: Fri, 16 Aug 2024 15:52:27 +0800 +Subject: [PATCH 02/21] i386: Fix some vex insns that prohibit egpr + +Although these vex insn have evex counterpart, but when it +uses the displayed vex prefix should not support APX EGPR. +Like TARGET_AVXVNNI, TARGET_IFMA and TARGET_AVXNECONVERT. +TARGET_AVXVNNIINT8 and TARGET_AVXVNNITINT16 also are vex +insn should not support egpr. + +gcc/ChangeLog: + + * config/i386/sse.md (vpmadd52): + Prohibit egpr for vex version. + (vpdpbusd_): Ditto. + (vpdpbusds_): Ditto. + (vpdpwssd_): Ditto. + (vpdpwssds_): Ditto. + (*vcvtneps2bf16_v4sf): Ditto. + (*vcvtneps2bf16_v8sf): Ditto. + (vpdp_): Ditto. + (vbcstnebf162ps_): Ditto. + (vbcstnesh2ps_): Ditto. + (vcvtnee2ps_): Ditto. + (vcvtneo2ps_): Ditto. + (vpdp_): Ditto. + +(cherry picked from commit 45a771d22e3090c42a4934a49da9924165e080af) +--- + gcc/config/i386/sse.md | 49 +++++++++++++++++++++++++++--------------- + 1 file changed, 32 insertions(+), 17 deletions(-) + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 1bf50726e83..218aa412c33 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -29364,7 +29364,7 @@ + (unspec:VI8_AVX2 + [(match_operand:VI8_AVX2 1 "register_operand" "0,0") + (match_operand:VI8_AVX2 2 "register_operand" "x,v") +- (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")] ++ (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xjm,vm")] + VPMADD52))] + "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)" + "@ +@@ -29372,6 +29372,7 @@ + vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "isa" "avxifma,avx512ifmavl") + (set_attr "type" "ssemuladd") ++ (set_attr "addr" "gpr16,*") + (set_attr "prefix" "vex,evex") + (set_attr "mode" "")]) + +@@ -29989,13 +29990,14 @@ + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") +- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] ++ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] + UNSPEC_VPDPBUSD))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3} + vpdpbusd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) ++ (set_attr "addr" "gpr16,*") + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) + + (define_insn "vpdpbusd__mask" +@@ -30057,13 +30059,14 @@ + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") +- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] ++ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] + UNSPEC_VPDPBUSDS))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3} + vpdpbusds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) ++ (set_attr "addr" "gpr16,*") + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) + + (define_insn "vpdpbusds__mask" +@@ -30125,13 +30128,14 @@ + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") +- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] ++ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] + UNSPEC_VPDPWSSD))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3} + vpdpwssd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) ++ (set_attr "addr" "gpr16,*") + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) + + (define_insn "vpdpwssd__mask" +@@ -30193,13 +30197,14 @@ + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") +- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] ++ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] + UNSPEC_VPDPWSSDS))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3} + vpdpwssds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) ++ (set_attr "addr" "gpr16,*") + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) + + (define_insn "vpdpwssds__mask" +@@ -30417,13 +30422,14 @@ + [(set (match_operand:V8BF 0 "register_operand" "=x,v") + (vec_concat:V8BF + (float_truncate:V4BF +- (match_operand:V4SF 1 "nonimmediate_operand" "xm,vm")) ++ (match_operand:V4SF 1 "nonimmediate_operand" "xjm,vm")) + (match_operand:V4BF 2 "const0_operand")))] + "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)" + "@ + %{vex%} vcvtneps2bf16{x}\t{%1, %0|%0, %1} + vcvtneps2bf16{x}\t{%1, %0|%0, %1}" + [(set_attr "isa" "avxneconvert,avx512bf16vl") ++ (set_attr "addr" "gpr16,*") + (set_attr "prefix" "vex,evex")]) + + (define_expand "avx512f_cvtneps2bf16_v4sf_maskz" +@@ -30481,12 +30487,13 @@ + (define_insn "vcvtneps2bf16_v8sf" + [(set (match_operand:V8BF 0 "register_operand" "=x,v") + (float_truncate:V8BF +- (match_operand:V8SF 1 "nonimmediate_operand" "xm,vm")))] ++ (match_operand:V8SF 1 "nonimmediate_operand" "xjm,vm")))] + "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)" + "@ + %{vex%} vcvtneps2bf16{y}\t{%1, %0|%0, %1} + vcvtneps2bf16{y}\t{%1, %0|%0, %1}" + [(set_attr "isa" "avxneconvert,avx512bf16vl") ++ (set_attr "addr" "gpr16,*") + (set_attr "prefix" "vex,evex")]) + + +@@ -30942,30 +30949,33 @@ + (unspec:VI4_AVX + [(match_operand:VI4_AVX 1 "register_operand" "0") + (match_operand:VI4_AVX 2 "register_operand" "x") +- (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")] ++ (match_operand:VI4_AVX 3 "nonimmediate_operand" "xjm")] + VPDOTPROD))] + "TARGET_AVXVNNIINT8" + "vpdp\t{%3, %2, %0|%0, %2, %3}" +- [(set_attr "prefix" "vex")]) ++ [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16")]) + + (define_insn "vbcstnebf162ps_" + [(set (match_operand:VF1_128_256 0 "register_operand" "=x") + (vec_duplicate:VF1_128_256 + (float_extend:SF +- (match_operand:BF 1 "memory_operand" "m"))))] ++ (match_operand:BF 1 "memory_operand" "jm"))))] + "TARGET_AVXNECONVERT" + "vbcstnebf162ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_insn "vbcstnesh2ps_" + [(set (match_operand:VF1_128_256 0 "register_operand" "=x") + (vec_duplicate:VF1_128_256 + (float_extend:SF +- (match_operand:HF 1 "memory_operand" "m"))))] ++ (match_operand:HF 1 "memory_operand" "jm"))))] + "TARGET_AVXNECONVERT" + "vbcstnesh2ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_mode_attr bf16_ph +@@ -30976,19 +30986,20 @@ + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float_extend:V4SF + (vec_select: +- (match_operand:VHFBF_128 1 "memory_operand" "m") ++ (match_operand:VHFBF_128 1 "memory_operand" "jm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))))] + "TARGET_AVXNECONVERT" + "vcvtnee2ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_insn "vcvtnee2ps_" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (float_extend:V8SF + (vec_select: +- (match_operand:VHFBF_256 1 "memory_operand" "m") ++ (match_operand:VHFBF_256 1 "memory_operand" "jm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) +@@ -30996,25 +31007,27 @@ + "TARGET_AVXNECONVERT" + "vcvtnee2ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_insn "vcvtneo2ps_" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float_extend:V4SF + (vec_select: +- (match_operand:VHFBF_128 1 "memory_operand" "m") ++ (match_operand:VHFBF_128 1 "memory_operand" "jm") + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))] + "TARGET_AVXNECONVERT" + "vcvtneo2ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_insn "vcvtneo2ps_" + [(set (match_operand:V8SF 0 "register_operand" "=x") + (float_extend:V8SF + (vec_select: +- (match_operand:VHFBF_256 1 "memory_operand" "m") ++ (match_operand:VHFBF_256 1 "memory_operand" "jm") + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) +@@ -31022,6 +31035,7 @@ + "TARGET_AVXNECONVERT" + "vcvtneo2ps\t{%1, %0|%0, %1}" + [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16") + (set_attr "mode" "")]) + + (define_int_iterator VPDPWPROD +@@ -31078,8 +31092,9 @@ + (unspec:VI4_AVX + [(match_operand:VI4_AVX 1 "register_operand" "0") + (match_operand:VI4_AVX 2 "register_operand" "x") +- (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")] ++ (match_operand:VI4_AVX 3 "nonimmediate_operand" "xjm")] + VPDPWPROD))] + "TARGET_AVXVNNIINT16" + "vpdp\t{%3, %2, %0|%0, %2, %3}" +- [(set_attr "prefix" "vex")]) ++ [(set_attr "prefix" "vex") ++ (set_attr "addr" "gpr16")]) +-- +2.31.1 + diff --git a/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch b/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee2a5b02d0ede3c325f0d4412eaaf480643c56d6 --- /dev/null +++ b/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch @@ -0,0 +1,232 @@ +From dbd597e8083884f9f1d15d8b641b1da244a1ee95 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Thu, 15 Aug 2024 12:54:07 +0800 +Subject: [PATCH 03/21] Align ix86_{move_max,store_max} with vectorizer. + +When none of mprefer-vector-width, avx256_optimal/avx128_optimal, +avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will +set ix86_{move_max,store_max} as max available vector length except +for AVX part. + + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) + && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + opts->x_ix86_move_max = PVW_AVX512; + else + opts->x_ix86_move_max = PVW_AVX128; + +So for -mavx2, vectorizer will choose 256-bit for vectorization, but +128-bit is used for struct copy, there could be a potential STLF issue +due to this "misalign". + +The patch fixes that. + +gcc/ChangeLog: + + * config/i386/i386-options.cc (ix86_option_override_internal): + set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX + instead of PVW_AVX128. + +gcc/testsuite/ChangeLog: + * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128. + * gcc.target/i386/pieces-memcpy-6.c: Ditto. + * gcc.target/i386/pieces-memset-38.c: Ditto. + * gcc.target/i386/pieces-memset-40.c: Ditto. + * gcc.target/i386/pieces-memset-41.c: Ditto. + * gcc.target/i386/pieces-memset-42.c: Ditto. + * gcc.target/i386/pieces-memset-43.c: Ditto. + * gcc.target/i386/pieces-strcpy-2.c: Ditto. + * gcc.target/i386/pieces-memcpy-22.c: New test. + * gcc.target/i386/pieces-memset-51.c: New test. + * gcc.target/i386/pieces-strcpy-3.c: New test. + +(cherry picked from commit 27dc1533b6dfc49f3912c524db51d6c372a5ac3d) +--- + gcc/config/i386/i386-options.cc | 6 ++++++ + gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++ + gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++ + gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +- + gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++ + 12 files changed, 53 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c + create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c + create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 6c212a8edeb..f6c450cc871 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -3062,6 +3062,9 @@ ix86_option_override_internal (bool main_args_p, + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) + && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + opts->x_ix86_move_max = PVW_AVX512; ++ /* Align with vectorizer to avoid potential STLF issue. */ ++ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_move_max = PVW_AVX256; + else + opts->x_ix86_move_max = PVW_AVX128; + } +@@ -3086,6 +3089,9 @@ ix86_option_override_internal (bool main_args_p, + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) + && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + opts->x_ix86_store_max = PVW_AVX512; ++ /* Align with vectorizer to avoid potential STLF issue. */ ++ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_store_max = PVW_AVX256; + else + opts->x_ix86_store_max = PVW_AVX128; + } +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +index 5faee21f9b9..53ad0b3be44 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *dst, *src; + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c +new file mode 100644 +index 00000000000..605b3623ffc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ ++ ++extern char *dst, *src; ++ ++void ++foo (void) ++{ ++ __builtin_memcpy (dst, src, 33); ++} ++ ++/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +index 5f99cc98c47..cfd2a86cf33 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target { ! ia32 } } } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *dst, *src; + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +index ed4a24a54fd..ddd194debd5 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *dst; + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +index 86358c99a83..5878876550c 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ + /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ + /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +index d7a27f52983..27a6c8ad139 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ + /* Cope with --enable-frame-pointer. */ + /* { dg-additional-options "-fomit-frame-pointer" } */ + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +index df0c122aae7..103da699ae5 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *dst; + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +index 2f2179c2df9..f1494e17610 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *dst; + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c +new file mode 100644 +index 00000000000..192ec0d1647 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ ++ ++extern char *dst; ++ ++void ++foo (int x) ++{ ++ __builtin_memset (dst, x, 64); ++} ++ ++/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +index 90446edb4f3..9bb94b7419b 100644 +--- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c ++++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target { ! ia32 } } } */ +-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ + + extern char *strcpy (char *, const char *); + +diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c +new file mode 100644 +index 00000000000..df7571b547f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ ++ ++extern char *strcpy (char *, const char *); ++ ++void ++foo (char *s) ++{ ++ strcpy (s, ++ "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" ++ "1234567"); ++} ++ ++/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ ++/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +-- +2.31.1 + diff --git a/GCC14-1009-Check-avx-upper-register-for-parallel.patch b/GCC14-1009-Check-avx-upper-register-for-parallel.patch new file mode 100644 index 0000000000000000000000000000000000000000..7fca35e810b53e785c8d6766a5a3d7efddcd445c --- /dev/null +++ b/GCC14-1009-Check-avx-upper-register-for-parallel.patch @@ -0,0 +1,148 @@ +From 61495359a1dc4ec724261559e765170ea7c1ba31 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Thu, 29 Aug 2024 11:39:20 +0800 +Subject: [PATCH 04/21] Check avx upper register for parallel. + +For function arguments/return, when it's BLK mode, it's put in a +parallel with an expr_list, and the expr_list contains the real mode +and registers. +Current ix86_check_avx_upper_register only checked for SSE_REG_P, and +failed to handle that. The patch extend the handle to each subrtx. + +gcc/ChangeLog: + + PR target/116512 + * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate + subrtx to scan for avx upper register. + (ix86_check_avx_upper_stores): Inline old + ix86_check_avx_upper_register. + (ix86_avx_u128_mode_needed): Ditto, and replace + FOR_EACH_SUBRTX with call to new + ix86_check_avx_upper_register. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr116512.c: New test. + +(cherry picked from commit ba9a3f105ea552a22d08f2d54dfdbef16af7c99e) +--- + gcc/config/i386/i386.cc | 36 +++++++++++++++--------- + gcc/testsuite/gcc.target/i386/pr116512.c | 26 +++++++++++++++++ + 2 files changed, 49 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 35a28243389..a6b7e8b395e 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -15027,9 +15027,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn) + static bool + ix86_check_avx_upper_register (const_rtx exp) + { +- return (SSE_REG_P (exp) +- && !EXT_REX_SSE_REG_P (exp) +- && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); ++ /* construct_container may return a parallel with expr_list ++ which contains the real reg and mode */ ++ subrtx_iterator::array_type array; ++ FOR_EACH_SUBRTX (iter, array, exp, NONCONST) ++ { ++ const_rtx x = *iter; ++ if (SSE_REG_P (x) ++ && !EXT_REX_SSE_REG_P (x) ++ && GET_MODE_BITSIZE (GET_MODE (x)) > 128) ++ return true; ++ } ++ ++ return false; + } + + /* Check if a 256bit or 512bit AVX register is referenced in stores. */ +@@ -15037,7 +15047,9 @@ ix86_check_avx_upper_register (const_rtx exp) + static void + ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) + { +- if (ix86_check_avx_upper_register (dest)) ++ if (SSE_REG_P (dest) ++ && !EXT_REX_SSE_REG_P (dest) ++ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) + { + bool *used = (bool *) data; + *used = true; +@@ -15096,14 +15108,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + return AVX_U128_CLEAN; + } + +- subrtx_iterator::array_type array; +- + rtx set = single_set (insn); + if (set) + { + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); +- if (ix86_check_avx_upper_register (dest)) ++ if (SSE_REG_P (dest) ++ && !EXT_REX_SSE_REG_P (dest) ++ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) + { + /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the + source isn't zero. */ +@@ -15114,9 +15126,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + } + else + { +- FOR_EACH_SUBRTX (iter, array, src, NONCONST) +- if (ix86_check_avx_upper_register (*iter)) +- return AVX_U128_DIRTY; ++ if (ix86_check_avx_upper_register (src)) ++ return AVX_U128_DIRTY; + } + + /* This isn't YMM/ZMM load/store. */ +@@ -15127,9 +15138,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) + Hardware changes state only when a 256bit register is written to, + but we need to prevent the compiler from moving optimal insertion + point above eventual read from 256bit or 512 bit register. */ +- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) +- if (ix86_check_avx_upper_register (*iter)) +- return AVX_U128_DIRTY; ++ if (ix86_check_avx_upper_register (PATTERN (insn))) ++ return AVX_U128_DIRTY; + + return AVX_U128_ANY; + } +diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c +new file mode 100644 +index 00000000000..c2bc6c91b64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr116512.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=x86-64-v4 -O2" } */ ++/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ ++ ++#include ++ ++struct B { ++ union { ++ __m512 f; ++ __m512i s; ++ }; ++}; ++ ++struct B foo(int n) { ++ struct B res; ++ res.s = _mm512_set1_epi32(n); ++ ++ return res; ++} ++ ++__m512i bar(int n) { ++ struct B res; ++ res.s = _mm512_set1_epi32(n); ++ ++ return res.s; ++} +-- +2.31.1 + diff --git a/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch b/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch new file mode 100644 index 0000000000000000000000000000000000000000..25dd95e41df033749450809d9d6c7b6ce896185d --- /dev/null +++ b/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch @@ -0,0 +1,134 @@ +From 7f3fcf3d6a95ab7f5abb6c9fd591783c930be081 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Mon, 2 Sep 2024 15:00:22 +0800 +Subject: [PATCH 05/21] i386: Fix vfpclassph non-optimizied intrin + +The intrin for non-optimized got a typo in mask type, which will cause +the high bits of __mmask32 being unexpectedly zeroed. + +The test does not fail under O0 with current 1b since the testcase is +wrong. We need to include avx512-mask-type.h after SIZE is defined, or +it will always be __mmask8. I will write a seperate patch to fix that +on trunk ONLY. + +gcc/ChangeLog: + + * config/i386/avx512fp16intrin.h + (_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32. + (_mm512_fpclass_ph_mask): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test. + +(cherry picked from commit 59157c038d683e91c419a1fadd5f91f15218f57b) +--- + gcc/config/i386/avx512fp16intrin.h | 4 +- + .../i386/avx512fp16-vfpclassph-1c.c | 77 +++++++++++++++++++ + 2 files changed, 79 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c + +diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h +index f86050b2087..e8baebd41d3 100644 +--- a/gcc/config/i386/avx512fp16intrin.h ++++ b/gcc/config/i386/avx512fp16intrin.h +@@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm) + #else + #define _mm512_mask_fpclass_ph_mask(u, x, c) \ + ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ +- (int) (c),(__mmask8)(u))) ++ (int) (c),(__mmask32)(u))) + + #define _mm512_fpclass_ph_mask(x, c) \ + ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ +- (int) (c),(__mmask8)-1)) ++ (int) (c),(__mmask32)-1)) + #endif /* __OPIMTIZE__ */ + + /* Intrinsics vgetexpph. */ +diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c +new file mode 100644 +index 00000000000..4739f1228e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c +@@ -0,0 +1,77 @@ ++/* { dg-do run } */ ++/* { dg-options "-O0 -mavx512fp16" } */ ++/* { dg-require-effective-target avx512fp16 } */ ++ ++#define AVX512FP16 ++#include "avx512f-helper.h" ++ ++#include ++#include ++#include ++#define SIZE (AVX512F_LEN / 16) ++#include "avx512f-mask-type.h" ++ ++#ifndef __FPCLASSPH__ ++#define __FPCLASSPH__ ++int check_fp_class_hp (_Float16 src, int imm) ++{ ++ int qNaN_res = isnan (src); ++ int sNaN_res = isnan (src); ++ int Pzero_res = (src == 0.0); ++ int Nzero_res = (src == -0.0); ++ int PInf_res = (isinf (src) == 1); ++ int NInf_res = (isinf (src) == -1); ++ int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); ++ int FinNeg_res = __builtin_finite (src) && (src < 0); ++ ++ int result = (((imm & 1) && qNaN_res) ++ || (((imm >> 1) & 1) && Pzero_res) ++ || (((imm >> 2) & 1) && Nzero_res) ++ || (((imm >> 3) & 1) && PInf_res) ++ || (((imm >> 4) & 1) && NInf_res) ++ || (((imm >> 5) & 1) && Denorm_res) ++ || (((imm >> 6) & 1) && FinNeg_res) ++ || (((imm >> 7) & 1) && sNaN_res)); ++ return result; ++} ++#endif ++ ++MASK_TYPE ++CALC (_Float16 *s1, int imm) ++{ ++ int i; ++ MASK_TYPE res = 0; ++ ++ for (i = 0; i < SIZE; i++) ++ if (check_fp_class_hp(s1[i], imm)) ++ res = res | (1 << i); ++ ++ return res; ++} ++ ++void ++TEST (void) ++{ ++ int i; ++ UNION_TYPE (AVX512F_LEN, h) src; ++ MASK_TYPE res1, res2, res_ref = 0; ++ MASK_TYPE mask = MASK_VALUE; ++ ++ src.a[SIZE - 1] = NAN; ++ src.a[SIZE - 2] = 1.0 / 0.0; ++ for (i = 0; i < SIZE - 2; i++) ++ { ++ src.a[i] = -24.43 + 0.6 * i; ++ } ++ ++ res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); ++ res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); ++ ++ res_ref = CALC (src.a, 0xFF); ++ ++ if (res_ref != res1) ++ abort (); ++ ++ if ((mask & res_ref) != res2) ++ abort (); ++} +-- +2.31.1 + diff --git a/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch b/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..e8c0947984f121898bffca7dd9ab7917aae3ad69 --- /dev/null +++ b/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch @@ -0,0 +1,64 @@ +From 6b0ea4cec9cc15d00bf72efdc86295dc5b5e75d5 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Fri, 6 Sep 2024 11:19:26 +0800 +Subject: [PATCH 06/21] doc: Enhance Intel CPU documentation + +This patch will add those recent aliased CPU names into documentation +for clearness. + +gcc/ChangeLog: + + PR target/116617 + * doc/invoke.texi: Add meteorlake, raptorlake and lunarlake. + +(cherry picked from commit 3951efed1cce970a5c61eacbad7e5f5314a9fc17) +--- + gcc/doc/invoke.texi | 25 ++++++++++++++----------- + 1 file changed, 14 insertions(+), 11 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f82f7d2817b..cf7ae3d39e6 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -34484,12 +34484,14 @@ UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 and AVX512BF16 + instruction set support. + + @item alderlake +-Intel Alderlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, +-XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, +-CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +-VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set +-support. ++@itemx raptorlake ++@itemx meteorlake ++Intel Alderlake/Raptorlake/Meteorlake CPU with 64-bit extensions, MOVBE, MMX, ++SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, ++XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, ++MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, ++LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and ++AVX-VNNI instruction set support. + + @item rocketlake + Intel Rocketlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3 +@@ -34531,11 +34533,12 @@ UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set + support. + + @item arrowlake-s +-Intel Arrow Lake S CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, +-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, ++@itemx lunarlake ++Intel Arrow Lake S/Lunarlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, ++SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, ++XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, ++MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, ++LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, + UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, + SM3 and SM4 instruction set support. + +-- +2.31.1 + diff --git a/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch b/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch new file mode 100644 index 0000000000000000000000000000000000000000..b1fd47ea799894a3662c68a2d8f2599feaaaf12e --- /dev/null +++ b/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch @@ -0,0 +1,404 @@ +From dfa4557ce2dda34c0d8bd5dd5a384eca2cdf5d51 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Wed, 18 Sep 2024 11:20:15 +0800 +Subject: [PATCH 07/21] doc: Add more alias option and reorder Intel CPU -march + documentation + +This patch is backported from GCC15 with some tweaks. + +Since r15-3539, there are requests coming in to add other alias option +documentation. This patch will add all ot them, including corei7, corei7-avx, +core-avx-i, core-avx2, atom, slm, gracemont and emerarldrapids. + +Also in the patch, I reordered that part of documentation, currently all +the CPUs/products are just all over the place. I regrouped them by +date-to-now products (since the very first CPU to latest Panther Lake), P-core +(since the clients become hybrid cores, starting from Sapphire Rapids) and +E-core (since Bonnell to latest Clearwater Forest). In GCC14 and +eariler GCC, Xeon Phi CPUs are still there, I put them after E-core +CPUs. + +And in the patch, I refined the product names in documentation. + +gcc/ChangeLog: + + * doc/invoke.texi: Add corei7, corei7-avx, core-avx-i, + core-avx2, atom, slm, gracemont and emerarldrapids. Reorder + the -march documentation by splitting them into date-to-now + products, P-core, E-core and Xeon Phi. Refine the product names in + documentation. + +(cherry picked from commit a3efd2ff9db0545d0f504153a6a0195e1c92e5cf) +--- + gcc/doc/invoke.texi | 260 +++++++++++++++++++++++--------------------- + 1 file changed, 134 insertions(+), 126 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index cf7ae3d39e6..b175bac6715 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -34315,6 +34315,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, + SAHF and FXSR instruction set support. + + @item nehalem ++@itemx corei7 + Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. + +@@ -34323,17 +34324,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. + + @item sandybridge ++@itemx corei7-avx + Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set + support. + + @item ivybridge ++@itemx core-avx-i + Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND + and F16C instruction set support. + + @item haswell +-Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, ++@itemx core-avx2 ++Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, + F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. + +@@ -34349,74 +34353,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, + F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, + CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. + +-@item bonnell +-Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 +-instruction set support. +- +-@item silvermont +-Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND +-instruction set support. +- +-@item goldmont +-Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, +-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction +-set support. +- +-@item goldmont-plus +-Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, +-SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, +-RDPID and SGX instruction set support. +- +-@item tremont +-Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, +-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, +-SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set +-support. +- +-@item sierraforest +-Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, +-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set +-support. +- +-@item grandridge +-Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, +-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set +-support. +- +-@item clearwaterforest +-Intel Clearwater Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, +-SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, +-XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, +-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, +-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, +-SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support. +- +-@item knl +-Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +-RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, +-AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. +- +-@item knm +-Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +-RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, +-AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, +-AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. +- + @item skylake-avx512 + Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +@@ -34424,16 +34360,30 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, + AVX512DQ and AVX512CD instruction set support. + ++@item cascadelake ++Intel Cascade Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, ++F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, ++CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD and AVX512VNNI instruction set support. ++ + @item cannonlake +-Intel Cannonlake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, ++Intel Cannon Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, + SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, + FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, + PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, + AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA and SHA instruction set + support. + ++@item cooperlake ++Intel Cooper Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, ++F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, ++CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. ++ + @item icelake-client +-Intel Icelake Client CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++Intel Ice Lake Client CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, + RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +@@ -34441,7 +34391,7 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 + , VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. + + @item icelake-server +-Intel Icelake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++Intel Ice Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, + RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +@@ -34449,22 +34399,8 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 + , VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD and CLWB + instruction set support. + +-@item cascadelake +-Intel Cascadelake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +-F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +-CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, +-AVX512CD and AVX512VNNI instruction set support. +- +-@item cooperlake +-Intel cooperlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +-F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +-CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, +-AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. +- + @item tigerlake +-Intel Tigerlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, ++Intel Tiger Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, + SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, + F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, + CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD +@@ -34472,37 +34408,67 @@ PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, MOVDIRI, MOVDIR64B, CLWB, + AVX512VP2INTERSECT and KEYLOCKER instruction set support. + +-@item sapphirerapids +-Intel sapphirerapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +-RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, +-AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +-AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, +-VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, +-UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 and AVX512BF16 +-instruction set support. ++@item rocketlake ++Intel Rocket Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, ++F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, ++CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD ++PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, ++VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. + + @item alderlake + @itemx raptorlake + @itemx meteorlake +-Intel Alderlake/Raptorlake/Meteorlake CPU with 64-bit extensions, MOVBE, MMX, +-SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, ++@itemx gracemont ++Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, ++MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, ++PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, ++GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, ++BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, ++WIDEKL and AVX-VNNI instruction set support. ++ ++@item arrowlake ++Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, ++XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, ++MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, ++PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, ++UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set ++support. ++ ++@item arrowlake-s ++@itemx lunarlake ++Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, ++SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, + XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, + MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, +-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and +-AVX-VNNI instruction set support. ++LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, ++UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, ++SM3 and SM4 instruction set support. + +-@item rocketlake +-Intel Rocketlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3 +-, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +-F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +-CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD +-PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, +-VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. ++@item pantherlake ++Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, ++XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, ++MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, ++PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, ++UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, ++SM3, SM4 and PREFETCHI instruction set support. ++ ++@item sapphirerapids ++@itemx emeraldrapids ++Intel Sapphire Rapids/Emerald Rapids CPU with 64-bit extensions, MMX, SSE, ++SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, ++FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, ++PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, ++AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, ++AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, ++WBNOINVD, CLWB, MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, ++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 ++and AVX512BF16 instruction set support. + + @item graniterapids +-Intel graniterapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++Intel Granite Rapids CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, + RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +@@ -34513,7 +34479,7 @@ UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16 + and PREFETCHI instruction set support. + + @item graniterapids-d +-Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++Intel Granite Rapids D CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, + RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +@@ -34523,33 +34489,75 @@ MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, + UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16, + PREFETCHI and AMX-COMPLEX instruction set support. + +-@item arrowlake +-Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++@item bonnell ++@itemx atom ++Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 ++instruction set support. ++ ++@item silvermont ++@itemx slm ++Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND ++instruction set support. ++ ++@item goldmont ++Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, ++RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction ++set support. ++ ++@item goldmont-plus ++Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, ++SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, ++RDPID and SGX instruction set support. ++ ++@item tremont ++Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, ++SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, ++RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, ++SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set ++support. ++ ++@item sierraforest ++Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, + XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, + MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, + PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set ++AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set + support. + +-@item arrowlake-s +-@itemx lunarlake +-Intel Arrow Lake S/Lunarlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, ++@item grandridge ++Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, ++XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, ++MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, ++PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, ++AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set ++support. ++ ++@item clearwaterforest ++Intel Clearwater Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, + SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, + XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, + MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, + LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, +-SM3 and SM4 instruction set support. ++ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, ++SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support. + +-@item pantherlake +-Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, +-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, +-SM3, SM4 and PREFETCHI instruction set support. ++@item knl ++Intel Knights Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. ++ ++@item knm ++Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, ++AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. + + @item k6 + AMD K6 CPU with MMX instruction set support. +-- +2.31.1 + diff --git a/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch b/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac4f937fda8d53c4d22dc1c8cb4963a28d606c1c --- /dev/null +++ b/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch @@ -0,0 +1,173 @@ +From 5ecb6cc6761adb80e104a7a8e4b946cb74adca25 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Tue, 24 Sep 2024 15:53:14 +0800 +Subject: [PATCH 08/21] Add new microarchitecture tune for SRF/GRR/CWF. + +For Crestmont, 4-operand vex blendv instructions come from MSROM and +is slower than 3-instructions sequence (op1 & mask) | (op2 & ~mask). +legacy blendv instruction can still be handled by the decoder. + +The patch add a new tune which is enabled for all processors except +for SRF/CWF. It will use vpand + vpandn + vpor instead of +vpblendvb(similar for vblendvps/vblendvpd) for SRF/CWF. + +gcc/ChangeLog: + + * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Guard + instruction blendv generation under new tune. + * config/i386/i386.h (TARGET_SSE_MOVCC_USE_BLENDV): New Macro. + * config/i386/x86-tune.def (X86_TUNE_SSE_MOVCC_USE_BLENDV): + New tune. + +(cherry picked from commit fe0692f689a18c432d6f59f404d4cd020cbebef2) +--- + gcc/config/i386/i386-expand.cc | 24 +++++++++---------- + gcc/config/i386/i386.h | 2 ++ + gcc/config/i386/x86-tune.def | 8 +++++++ + .../gcc.target/i386/sse_movcc_use_blendv.c | 12 ++++++++++ + 4 files changed, 34 insertions(+), 12 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c + +diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc +index 51efe6fdd7d..0a24a46fbf8 100644 +--- a/gcc/config/i386/i386-expand.cc ++++ b/gcc/config/i386/i386-expand.cc +@@ -4220,23 +4220,23 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) + switch (mode) + { + case E_V2SFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_mmx_blendvps; + break; + case E_V4SFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_sse4_1_blendvps; + break; + case E_V2DFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_sse4_1_blendvpd; + break; + case E_SFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_sse4_1_blendvss; + break; + case E_DFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_sse4_1_blendvsd; + break; + case E_V8QImode: +@@ -4244,7 +4244,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) + case E_V4HFmode: + case E_V4BFmode: + case E_V2SImode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + { + gen = gen_mmx_pblendvb_v8qi; + blend_mode = V8QImode; +@@ -4254,14 +4254,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) + case E_V2HImode: + case E_V2HFmode: + case E_V2BFmode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + { + gen = gen_mmx_pblendvb_v4qi; + blend_mode = V4QImode; + } + break; + case E_V2QImode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + gen = gen_mmx_pblendvb_v2qi; + break; + case E_V16QImode: +@@ -4271,18 +4271,18 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) + case E_V4SImode: + case E_V2DImode: + case E_V1TImode: +- if (TARGET_SSE4_1) ++ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) + { + gen = gen_sse4_1_pblendvb; + blend_mode = V16QImode; + } + break; + case E_V8SFmode: +- if (TARGET_AVX) ++ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV) + gen = gen_avx_blendvps256; + break; + case E_V4DFmode: +- if (TARGET_AVX) ++ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV) + gen = gen_avx_blendvpd256; + break; + case E_V32QImode: +@@ -4291,7 +4291,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) + case E_V16BFmode: + case E_V8SImode: + case E_V4DImode: +- if (TARGET_AVX2) ++ if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV) + { + gen = gen_avx2_pblendvb; + blend_mode = V32QImode; +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 26e15d2677f..d78e554ec4b 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -459,6 +459,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; + ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC] + #define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC] + #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR] ++#define TARGET_SSE_MOVCC_USE_BLENDV \ ++ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] + + /* Feature tests against the various architecture variations. */ + enum ix86_arch_indices { +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index 1ab2f444b56..46e847589f9 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -532,6 +532,14 @@ DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_NONE) + DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, + "v2df_reduction_prefer_haddpd", m_NONE) + ++/* X86_TUNE_SSE_MOVCC_USE_BLENDV: Prefer blendv instructions to ++ 3-instruction sequence (op1 & mask) | (op2 & ~mask) ++ for vector condition move. ++ For Crestmont, 4-operand vex blendv instructions come from MSROM ++ which is slow. */ ++DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, ++ "sse_movcc_use_blendv", ~m_CORE_ATOM) ++ + /*****************************************************************************/ + /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ + /*****************************************************************************/ +diff --git a/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c +new file mode 100644 +index 00000000000..ac9f1524949 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=sierraforest -O2" } */ ++/* { dg-final { scan-assembler-not {(?n)vp?blendv(b|ps|pd)} } } */ ++ ++void ++foo (int* a, int* b, int* __restrict c) ++{ ++ for (int i = 0; i != 200; i++) ++ { ++ c[i] += a[i] > b[i] ? 1 : -1; ++ } ++} +-- +2.31.1 + diff --git a/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch b/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch new file mode 100644 index 0000000000000000000000000000000000000000..54b0ebd100ed5f3215f5de0840852f45e44a8e0f --- /dev/null +++ b/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch @@ -0,0 +1,417 @@ +From dd5d6d20ffeaf3ab272521e53f6afa6e3a8ceb03 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Wed, 16 Oct 2024 13:43:48 +0800 +Subject: [PATCH 09/21] Refine splitters related to "combine vpcmpuw + + zero_extend to vpcmpuw" + +r12-6103-g1a7ce8570997eb combines vpcmpuw + zero_extend to vpcmpuw +with the pre_reload splitter, but the splitter transforms the +zero_extend into a subreg which make reload think the upper part is +garbage, it's not correct. + +The patch adjusts the zero_extend define_insn_and_split to +define_insn to keep zero_extend. + +gcc/ChangeLog: + + PR target/117159 + * config/i386/sse.md + (*_cmp3_zero_extend): + Change from define_insn_and_split to define_insn. + (*_cmp3_zero_extend): + Ditto. + (*_ucmp3_zero_extend): + Ditto. + (*_ucmp3_zero_extend): + Ditto. + (*_cmp3_zero_extend_2): + Split to the zero_extend pattern. + (*_cmp3_zero_extend_2): + Ditto. + (*_ucmp3_zero_extend_2): + Ditto. + (*_ucmp3_zero_extend_2): + Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr117159.c: New test. + * gcc.target/i386/avx512bw-pr103750-1.c: Remove xfail. + * gcc.target/i386/avx512bw-pr103750-2.c: Remove xfail. + +(cherry picked from commit 79e7e02b7cc578d03eab2b50c029f44409ef8e26) +--- + gcc/config/i386/sse.md | 198 +++++++----------- + .../gcc.target/i386/avx512bw-pr103750-1.c | 3 +- + .../gcc.target/i386/avx512bw-pr103750-2.c | 3 +- + gcc/testsuite/gcc.target/i386/pr117159.c | 42 ++++ + 4 files changed, 125 insertions(+), 121 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr117159.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 218aa412c33..6591547159f 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -3967,32 +3967,19 @@ + + ;; Since vpcmpd implicitly clear the upper bits of dest, transform + ;; vpcmpd + zero_extend to vpcmpd since the instruction +-(define_insn_and_split "*_cmp3_zero_extend" +- [(set (match_operand:SWI248x 0 "register_operand") ++(define_insn "*_cmp3_zero_extend" ++ [(set (match_operand:SWI248x 0 "register_operand" "=k") + (zero_extend:SWI248x + (unspec: +- [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") +- (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") +- (match_operand:SI 3 "const_0_to_7_operand")] ++ [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") ++ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") ++ (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_PCMP)))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) +- && ix86_pre_reload_split () + && (GET_MODE_NUNITS (mode) + < GET_MODE_PRECISION (mode))" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_PCMP))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, +- operands[0], mode); +-} ++ "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") +@@ -4020,21 +4007,22 @@ + "#" + "&& 1" + [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_PCMP)) +- (set (match_dup 4) (match_dup 0))] ++ (zero_extend:SWI248x ++ (unspec: ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_PCMP))) ++ (set (match_dup 4) (match_dup 5))] + { +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, ++ operands[5] = lowpart_subreg (mode, + operands[0], mode); +-} +- [(set_attr "type" "ssecmp") +- (set_attr "length_immediate" "1") +- (set_attr "prefix" "evex") +- (set_attr "mode" "")]) ++ if (SUBREG_P (operands[5])) ++ { ++ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; ++ SUBREG_PROMOTED_SET (operands[5], 1); ++ } ++}) + + (define_insn_and_split "*_cmp3" + [(set (match_operand: 0 "register_operand") +@@ -4069,31 +4057,18 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +-(define_insn_and_split "*_cmp3_zero_extend" +- [(set (match_operand:SWI248x 0 "register_operand") ++(define_insn "*_cmp3_zero_extend" ++ [(set (match_operand:SWI248x 0 "register_operand" "=k") + (zero_extend:SWI248x + (unspec: +- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") +- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") +- (match_operand:SI 3 "const_0_to_7_operand")] ++ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") ++ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") ++ (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_PCMP)))] + "TARGET_AVX512BW +- && ix86_pre_reload_split () +- && (GET_MODE_NUNITS (mode) +- < GET_MODE_PRECISION (mode))" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_PCMP))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, +- operands[0], mode); +-} ++ && (GET_MODE_NUNITS (mode) ++ < GET_MODE_PRECISION (mode))" ++ "vpcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") +@@ -4120,16 +4095,21 @@ + "#" + "&& 1" + [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_PCMP)) +- (set (match_dup 4) (match_dup 0))] ++ (zero_extend:SWI248x ++ (unspec: ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_PCMP))) ++ (set (match_dup 4) (match_dup 5))] + { +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, ++ operands[5] = lowpart_subreg (mode, + operands[0], mode); ++ if (SUBREG_P (operands[5])) ++ { ++ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; ++ SUBREG_PROMOTED_SET (operands[5], 1); ++ } + } + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") +@@ -4187,31 +4167,18 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +-(define_insn_and_split "*_ucmp3_zero_extend" +- [(set (match_operand:SWI248x 0 "register_operand") ++(define_insn "*_ucmp3_zero_extend" ++ [(set (match_operand:SWI248x 0 "register_operand" "=k") + (zero_extend:SWI248x + (unspec: +- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") +- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") +- (match_operand:SI 3 "const_0_to_7_operand")] ++ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") ++ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") ++ (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP)))] + "TARGET_AVX512BW +- && ix86_pre_reload_split () + && (GET_MODE_NUNITS (mode) + < GET_MODE_PRECISION (mode))" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_UNSIGNED_PCMP))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, +- operands[0], mode); +-} ++ "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") +@@ -4239,16 +4206,21 @@ + "#" + "&& 1" + [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_UNSIGNED_PCMP)) +- (set (match_dup 4) (match_dup 0))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, ++ (zero_extend:SWI248x ++ (unspec: ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_UNSIGNED_PCMP))) ++ (set (match_dup 4) (match_dup 5))] ++{ ++ operands[5] = lowpart_subreg (mode, + operands[0], mode); ++ if (SUBREG_P (operands[5])) ++ { ++ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; ++ SUBREG_PROMOTED_SET (operands[5], 1); ++ } + } + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") +@@ -4284,32 +4256,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +-(define_insn_and_split "*_ucmp3_zero_extend" +- [(set (match_operand:SWI248x 0 "register_operand") ++(define_insn "*_ucmp3_zero_extend" ++ [(set (match_operand:SWI248x 0 "register_operand" "=k") + (zero_extend:SWI248x + (unspec: +- [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") +- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") +- (match_operand:SI 3 "const_0_to_7_operand")] ++ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v") ++ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") ++ (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP)))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) +- && ix86_pre_reload_split () + && (GET_MODE_NUNITS (mode) + < GET_MODE_PRECISION (mode))" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_UNSIGNED_PCMP))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, +- operands[0], mode); +-} ++ "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") +@@ -4337,16 +4296,21 @@ + "#" + "&& 1" + [(set (match_dup 0) +- (unspec: +- [(match_dup 1) +- (match_dup 2) +- (match_dup 3)] +- UNSPEC_UNSIGNED_PCMP)) +- (set (match_dup 4) (match_dup 0))] +-{ +- operands[1] = force_reg (mode, operands[1]); +- operands[0] = lowpart_subreg (mode, ++ (zero_extend:SWI248x ++ (unspec: ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_UNSIGNED_PCMP))) ++ (set (match_dup 4) (match_dup 5))] ++{ ++ operands[5] = lowpart_subreg (mode, + operands[0], mode); ++ if (SUBREG_P (operands[5])) ++ { ++ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; ++ SUBREG_PROMOTED_SET (operands[5], 1); ++ } + } + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") +diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c +index b1165f069bb..e7d6183232b 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c +@@ -1,8 +1,7 @@ + /* PR target/103750 */ + /* { dg-do compile } */ + /* { dg-options "-O2 -mavx512bw -mavx512vl" } */ +-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +-/* xfail need to be fixed. */ ++/* { dg-final { scan-assembler-not "kmov" } } */ + + #include + extern __m128i* pi128; +diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c +index 7303f5403ba..3392e193222 100644 +--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c ++++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c +@@ -1,8 +1,7 @@ + /* PR target/103750 */ + /* { dg-do compile } */ + /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ +-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +-/* xfail need to be fixed. */ ++/* { dg-final { scan-assembler-not "kmov" } } */ + + #include + extern __m128i* pi128; +diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c b/gcc/testsuite/gcc.target/i386/pr117159.c +new file mode 100644 +index 00000000000..b67d682ecef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117159.c +@@ -0,0 +1,42 @@ ++/* { dg-do run } */ ++/* { dg-options "-Os -mavx512bw" } */ ++/* { dg-require-effective-target avx512bw } */ ++ ++typedef __attribute__((__vector_size__ (4))) unsigned char W; ++typedef __attribute__((__vector_size__ (64))) int V; ++typedef __attribute__((__vector_size__ (64))) long long Vq; ++ ++W w; ++V v; ++Vq vq; ++ ++static inline W ++foo (short m) ++{ ++ unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m); ++ W r = (W) k + w; ++ return r; ++} ++ ++static inline W ++foo1 (short m) ++{ ++ unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, v, m); ++ W r = (W) k + w; ++ return r; ++} ++ ++int ++main () ++{ ++ if (!__builtin_cpu_supports ("avx512bw")) ++ return 0; ++ W y = foo1 (65535); ++ if (!y[0] || !y[1] || y[2] || y[3]) ++ __builtin_abort(); ++ W x = foo (65535); ++ if (x[0] || x[1] || x[2] || x[3]) ++ __builtin_abort(); ++ ++ return 0; ++} +-- +2.31.1 + diff --git a/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch b/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch new file mode 100644 index 0000000000000000000000000000000000000000..7873948dfcc141215611494b7bcfd3160b601e8c --- /dev/null +++ b/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch @@ -0,0 +1,90 @@ +From 234baf5dbb10c6af25fc2f74b8c725e61cdb0238 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Tue, 22 Oct 2024 01:54:40 -0700 +Subject: [PATCH 10/21] Fix ICE due to isa mismatch for the builtins. + +gcc/ChangeLog: + + PR target/117240 + * config/i386/i386-builtin.def: Add avx/avx512f to vaes + ymm/zmm builtins. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr117240_avx.c: New test. + * gcc.target/i386/pr117240_avx512f.c: New test. + +(cherry picked from commit b718f6ec1674c0db30f26c65b7a9215e9388dd6c) +--- + gcc/config/i386/i386-builtin.def | 16 ++++++++-------- + gcc/testsuite/gcc.target/i386/pr117240_avx.c | 10 ++++++++++ + gcc/testsuite/gcc.target/i386/pr117240_avx512f.c | 10 ++++++++++ + 3 files changed, 28 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr117240_avx.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr117240_avx512f.c + +diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def +index ab73e20121a..fdd9dba6e54 100644 +--- a/gcc/config/i386/i386-builtin.def ++++ b/gcc/config/i386/i386-builtin.def +@@ -2832,17 +2832,17 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B + + /* VAES. */ + BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) ++BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) + BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) ++BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) + BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) ++BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) + BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) ++BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) + + /* BF16 */ + BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) +diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx.c b/gcc/testsuite/gcc.target/i386/pr117240_avx.c +new file mode 100644 +index 00000000000..88e83085315 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117240_avx.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mvaes -mno-xsave -Wno-psabi" } */ ++ ++typedef __attribute__((__vector_size__(32))) char V; ++ ++V ++foo(V v) ++{ ++ return __builtin_ia32_vaesenc_v32qi(v, v);/* { dg-error "needs isa option" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c +new file mode 100644 +index 00000000000..c2d616a05e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mvaes -mevex512 -mno-xsave -Wno-psabi" } */ ++ ++typedef __attribute__((__vector_size__(64))) char V; ++ ++V ++foo(V v) ++{ ++ return __builtin_ia32_vaesenc_v64qi(v, v);/* { dg-error "needs isa option" } */ ++} +-- +2.31.1 + diff --git a/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch b/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch new file mode 100644 index 0000000000000000000000000000000000000000..0edd3c363c0af7e3051059c3e71f99c190fa1049 --- /dev/null +++ b/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch @@ -0,0 +1,444 @@ +From a586970df40f57d0208b514dcca8b9aa2de911c7 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Tue, 29 Oct 2024 02:09:39 -0700 +Subject: [PATCH 11/21] Fix ICE due to subreg:us_truncate. + +Force_operand issues an ICE when input +is (subreg:DI (us_truncate:V8QI)), it's probably because it's an +invalid rtx, So refine backend patterns for that. + +gcc/ChangeLog: + + PR target/117318 + * config/i386/sse.md (*avx512vl_v2div2qi2_mask_store_1): + Rename to .. + (avx512vl_v2div2qi2_mask_store_1): .. this. + (avx512vl_v2div2qi2_mask_store_2): Change to + define_expand. + (*avx512vl_v4qi2_mask_store_1): Rename to .. + (avx512vl_v4qi2_mask_store_1): .. this. + (avx512vl_v4qi2_mask_store_2): Change to + define_expand. + (*avx512vl_v8qi2_mask_store_1): Rename to .. + (avx512vl_v8qi2_mask_store_1): .. this. + (avx512vl_v8qi2_mask_store_2): Change to + define_expand. + (*avx512vl_v4hi2_mask_store_1): Rename to .. + (avx512vl_v4hi2_mask_store_1): .. this. + (avx512vl_v4hi2_mask_store_2): Change to + define_expand. + (*avx512vl_v2div2hi2_mask_store_1): Rename to .. + (avx512vl_v2div2hi2_mask_store_1): .. this. + (avx512vl_v2div2hi2_mask_store_2): Change to + define_expand. + (*avx512vl_v2div2si2_mask_store_1): Rename to .. + (avx512vl_v2div2si2_mask_store_1): .. this. + (avx512vl_v2div2si2_mask_store_2): Change to + define_expand. + (*avx512f_v8div16qi2_mask_store_1): Rename to .. + (avx512f_v8div16qi2_mask_store_1): .. this. + (avx512f_v8div16qi2_mask_store_2): Change to + define_expand. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr117318.c: New test. + +(cherry picked from commit 71a0cf699b6a2dc03abec53aeafab8b70db2bb07) +--- + gcc/config/i386/sse.md | 268 +++++++++-------------- + gcc/testsuite/gcc.target/i386/pr117318.c | 12 + + 2 files changed, 110 insertions(+), 170 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr117318.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 6591547159f..771c5cd01d2 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -14551,7 +14551,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v2div2qi2_mask_store_1" ++(define_insn "avx512vl_v2div2qi2_mask_store_1" + [(set (match_operand:V2QI 0 "memory_operand" "=m") + (vec_merge:V2QI + (any_truncate:V2QI +@@ -14565,28 +14565,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v2div2qi2_mask_store_2" +- [(set (match_operand:HI 0 "memory_operand") +- (subreg:HI +- (vec_merge:V2QI +- (any_truncate:V2QI +- (match_operand:V2DI 1 "register_operand")) +- (vec_select:V2QI +- (subreg:V4QI +- (vec_concat:V2HI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V2QI +- (any_truncate:V2QI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);") ++(define_expand "avx512vl_v2div2qi2_mask_store_2" ++ [(match_operand:HI 0 "memory_operand") ++ (any_truncate:V2QI ++ (match_operand:V2DI 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V2QImode, 0); ++ emit_insn (gen_avx512vl_v2div2qi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_insn "*avx512vl_v4qi2_store_1" + [(set (match_operand:V4QI 0 "memory_operand" "=m") +@@ -14655,7 +14646,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v4qi2_mask_store_1" ++(define_insn "avx512vl_v4qi2_mask_store_1" + [(set (match_operand:V4QI 0 "memory_operand" "=m") + (vec_merge:V4QI + (any_truncate:V4QI +@@ -14669,29 +14660,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v4qi2_mask_store_2" +- [(set (match_operand:SI 0 "memory_operand") +- (subreg:SI +- (vec_merge:V4QI +- (any_truncate:V4QI +- (match_operand:VI4_128_8_256 1 "register_operand")) +- (vec_select:V4QI +- (subreg:V8QI +- (vec_concat:V2SI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V4QI +- (any_truncate:V4QI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);") ++(define_expand "avx512vl_v4qi2_mask_store_2" ++ [(match_operand:SI 0 "memory_operand") ++ (any_truncate:V4QI ++ (match_operand:VI4_128_8_256 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V4QImode, 0); ++ emit_insn (gen_avx512vl_v4qi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_mode_iterator VI2_128_BW_4_256 + [(V8HI "TARGET_AVX512BW") V8SI]) +@@ -14763,7 +14744,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v8qi2_mask_store_1" ++(define_insn "avx512vl_v8qi2_mask_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (vec_merge:V8QI + (any_truncate:V8QI +@@ -14777,31 +14758,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v8qi2_mask_store_2" +- [(set (match_operand:DI 0 "memory_operand") +- (subreg:DI +- (vec_merge:V8QI +- (any_truncate:V8QI +- (match_operand:VI2_128_BW_4_256 1 "register_operand")) +- (vec_select:V8QI +- (subreg:V16QI +- (vec_concat:V2DI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3) +- (const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V8QI +- (any_truncate:V8QI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") ++(define_expand "avx512vl_v8qi2_mask_store_2" ++ [(match_operand:DI 0 "memory_operand") ++ (any_truncate:V8QI ++ (match_operand:VI2_128_BW_4_256 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V8QImode, 0); ++ emit_insn (gen_avx512vl_v8qi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) + (define_mode_attr pmov_dst_4 +@@ -14923,7 +14892,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v4hi2_mask_store_1" ++(define_insn "avx512vl_v4hi2_mask_store_1" + [(set (match_operand:V4HI 0 "memory_operand" "=m") + (vec_merge:V4HI + (any_truncate:V4HI +@@ -14941,30 +14910,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v4hi2_mask_store_2" +- [(set (match_operand:DI 0 "memory_operand") +- (subreg:DI +- (vec_merge:V4HI +- (any_truncate:V4HI +- (match_operand:VI4_128_8_256 1 "register_operand")) +- (vec_select:V4HI +- (subreg:V8HI +- (vec_concat:V2DI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V4HI +- (any_truncate:V4HI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);") +- ++(define_expand "avx512vl_v4hi2_mask_store_2" ++ [(match_operand:DI 0 "memory_operand") ++ (any_truncate:V4HI ++ (match_operand:VI4_128_8_256 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V4HImode, 0); ++ emit_insn (gen_avx512vl_v4hi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_insn "*avx512vl_v2div2hi2_store_1" + [(set (match_operand:V2HI 0 "memory_operand" "=m") +@@ -15025,7 +14983,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v2div2hi2_mask_store_1" ++(define_insn "avx512vl_v2div2hi2_mask_store_1" + [(set (match_operand:V2HI 0 "memory_operand" "=m") + (vec_merge:V2HI + (any_truncate:V2HI +@@ -15039,28 +14997,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v2div2hi2_mask_store_2" +- [(set (match_operand:SI 0 "memory_operand") +- (subreg:SI +- (vec_merge:V2HI +- (any_truncate:V2HI +- (match_operand:V2DI 1 "register_operand")) +- (vec_select:V2HI +- (subreg:V4HI +- (vec_concat:V2SI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V2HI +- (any_truncate:V2HI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);") ++(define_expand "avx512vl_v2div2hi2_mask_store_2" ++ [(match_operand:SI 0 "memory_operand") ++ (any_truncate:V2HI ++ (match_operand:V2DI 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V2HImode, 0); ++ emit_insn (gen_avx512vl_v2div2hi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_expand "truncv2div2si2" + [(set (match_operand:V2SI 0 "register_operand") +@@ -15168,7 +15117,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512vl_v2div2si2_mask_store_1" ++(define_insn "avx512vl_v2div2si2_mask_store_1" + [(set (match_operand:V2SI 0 "memory_operand" "=m") + (vec_merge:V2SI + (any_truncate:V2SI +@@ -15182,28 +15131,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512vl_v2div2si2_mask_store_2" +- [(set (match_operand:DI 0 "memory_operand") +- (subreg:DI +- (vec_merge:V2SI +- (any_truncate:V2SI +- (match_operand:V2DI 1 "register_operand")) +- (vec_select:V2SI +- (subreg:V4SI +- (vec_concat:V2DI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512VL && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V2SI +- (any_truncate:V2SI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);") ++(define_expand "avx512vl_v2div2si2_mask_store_2" ++ [(match_operand:DI 0 "memory_operand") ++ (any_truncate:V2SI ++ (match_operand:V2DI 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512VL" ++{ ++ operands[0] = adjust_address_nv (operands[0], V2SImode, 0); ++ emit_insn (gen_avx512vl_v2div2si2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + (define_expand "truncv8div8qi2" + [(set (match_operand:V8QI 0 "register_operand") +@@ -15302,7 +15242,7 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn "*avx512f_v8div16qi2_mask_store_1" ++(define_insn "avx512f_v8div16qi2_mask_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (vec_merge:V8QI + (any_truncate:V8QI +@@ -15316,31 +15256,19 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +-(define_insn_and_split "avx512f_v8div16qi2_mask_store_2" +- [(set (match_operand:DI 0 "memory_operand") +- (subreg:DI +- (vec_merge:V8QI +- (any_truncate:V8QI +- (match_operand:V8DI 1 "register_operand")) +- (vec_select:V8QI +- (subreg:V16QI +- (vec_concat:V2DI +- (match_dup 0) +- (const_int 0)) 0) +- (parallel [(const_int 0) (const_int 1) +- (const_int 2) (const_int 3) +- (const_int 4) (const_int 5) +- (const_int 6) (const_int 7)])) +- (match_operand:QI 2 "register_operand")) 0))] +- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (vec_merge:V8QI +- (any_truncate:V8QI (match_dup 1)) +- (match_dup 0) +- (match_dup 2)))] +- "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") ++(define_expand "avx512f_v8div16qi2_mask_store_2" ++ [(match_operand:DI 0 "memory_operand") ++ (any_truncate:V8QI ++ (match_operand:V8DI 1 "register_operand")) ++ (match_operand:QI 2 "register_operand")] ++ "TARGET_AVX512F && TARGET_EVEX512" ++{ ++ operands[0] = adjust_address_nv (operands[0], V8QImode, 0); ++ emit_insn (gen_avx512f_v8div16qi2_mask_store_1 (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; +diff --git a/gcc/testsuite/gcc.target/i386/pr117318.c b/gcc/testsuite/gcc.target/i386/pr117318.c +new file mode 100644 +index 00000000000..3d316ad04cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117318.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx512f -O" } */ ++ ++typedef __attribute__((__vector_size__ (64))) long long V; ++unsigned long long x; ++ ++unsigned long long ++foo() ++{ ++ __builtin_ia32_pmovusqb512mem_mask (&x, (V){8000000000000000}, 255); ++ return x; ++} +-- +2.31.1 + diff --git a/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch b/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ed62f3a4f8a28749cd898026d380a9b96280ea4 --- /dev/null +++ b/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch @@ -0,0 +1,157 @@ +From 317928fc26b9720fb8da54f2735901f28b9d6f65 Mon Sep 17 00:00:00 2001 +From: Hongyu Wang +Date: Wed, 7 Feb 2024 14:42:58 +0800 +Subject: [PATCH 12/21] [APX PPX] Avoid generating unmatched pushp/popp in + pro/epilogue + +According to APX spec, the pushp/popp pairs should be matched, +otherwise the PPX hint cannot take effect and cause performance loss. + +In the ix86_expand_epilogue, there are several optimizations that may +cause the epilogue using mov to restore the regs. Check if PPX applied +and prevent usage of mov/leave in the epilogue. Also do not use PPX +for eh_return. + +gcc/ChangeLog: + + * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used + flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return. + (ix86_emit_save_regs): Emit ppx is available only when + TARGET_APX_PPX && !crtl->calls_eh_return. + (ix86_expand_epilogue): Don't restore reg using mov when + apx_ppx_used flag is true. + * config/i386/i386.h (struct machine_frame_state): + Add apx_ppx_used flag. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/apx-ppx-2.c: New test. + * gcc.target/i386/apx-ppx-3.c: Likewise. + +(cherry picked from commit df542909224a7ff88b204534ad035a0b216a98bf) +--- + gcc/config/i386/i386.cc | 13 +++++++++---- + gcc/config/i386/i386.h | 4 ++++ + gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++ + gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++ + 4 files changed, 34 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index a6b7e8b395e..32780f69151 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -7417,6 +7417,7 @@ ix86_emit_save_regs (void) + { + int regno; + rtx_insn *insn; ++ bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return; + + if (!TARGET_APX_PUSH2POP2 + || !ix86_can_use_push2pop2 () +@@ -7426,7 +7427,7 @@ ix86_emit_save_regs (void) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), +- TARGET_APX_PPX)); ++ use_ppx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +@@ -7457,7 +7458,7 @@ ix86_emit_save_regs (void) + regno_list[0]), + gen_rtx_REG (word_mode, + regno_list[1]), +- TARGET_APX_PPX)); ++ use_ppx)); + RTX_FRAME_RELATED_P (insn) = 1; + rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); + +@@ -7490,7 +7491,7 @@ ix86_emit_save_regs (void) + else + { + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), +- TARGET_APX_PPX)); ++ use_ppx)); + RTX_FRAME_RELATED_P (insn) = 1; + aligned = true; + } +@@ -7499,7 +7500,7 @@ ix86_emit_save_regs (void) + { + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, + regno_list[0]), +- TARGET_APX_PPX)); ++ use_ppx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +@@ -8973,6 +8974,7 @@ ix86_expand_prologue (void) + if (!frame.save_regs_using_mov) + { + ix86_emit_save_regs (); ++ m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return; + int_registers_saved = true; + gcc_assert (m->fs.sp_offset == frame.reg_save_offset); + } +@@ -9858,6 +9860,9 @@ ix86_expand_epilogue (int style) + /* SEH requires the use of pops to identify the epilogue. */ + else if (TARGET_SEH) + restore_regs_via_mov = false; ++ /* If we already save reg with pushp, don't use move at epilogue. */ ++ else if (m->fs.apx_ppx_used) ++ restore_regs_via_mov = false; + /* If we're only restoring one register and sp cannot be used then + using a move instruction to restore the register since it's + less work than reloading sp and popping the register. */ +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index d78e554ec4b..3056f8b4c10 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2703,6 +2703,10 @@ struct GTY(()) machine_frame_state + The flags realigned and sp_realigned are mutually exclusive. */ + BOOL_BITFIELD sp_realigned : 1; + ++ /* When APX_PPX used in prologue, force epilogue to emit ++ popp instead of move and leave. */ ++ BOOL_BITFIELD apx_ppx_used : 1; ++ + /* If sp_realigned is set, this is the last valid offset from the CFA + that can be used for access with the frame pointer. */ + HOST_WIDE_INT sp_realigned_fp_last; +diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c +new file mode 100644 +index 00000000000..42a95340b55 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */ ++ ++/* { dg-final { scan-assembler "pushp" } } */ ++/* { dg-final { scan-assembler "popp" } } */ ++/* { dg-final { scan-assembler-not "leave" } } */ ++ ++extern int bar (int a); ++extern int *q; ++ ++void foo (int *a) ++{ ++ q[2] = bar (q[1]); ++} +diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c +new file mode 100644 +index 00000000000..76931fbe294 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O2 -mapx-features=ppx" } */ ++ ++/* { dg-final { scan-assembler-not "pushp" } } */ ++/* { dg-final { scan-assembler-not "popp" } } */ ++ ++#include "eh_return-2.c" +-- +2.31.1 + diff --git a/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch b/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..844899460ef451fd39a747d5d101b646eefea4b8 --- /dev/null +++ b/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch @@ -0,0 +1,73 @@ +From ccc5b723882eeb512b5b0fa2c3d29555822367f7 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Fri, 1 Nov 2024 15:59:47 +0800 +Subject: [PATCH 13/21] i386: Do not allow pointer conversion for CMPccXADD + intrin under -O0 + +The pointer conversion to wider type under macro would not consider +whether the higher bit is cleaned or not. It will lead to unexpected +cmp result. + +After this change, it will throw an incompatible pointer type error just +like -O2 does currently. + +gcc/ChangeLog: + + * config/i386/cmpccxaddintrin.h (_cmpccxadd_epi32): Do not do + type conversion for pointer. + (_cmpccxadd_epi64): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/cmpccxadd-1b.c: New test. + +(cherry picked from commit 82bfb6c5ba6d1f84472271f367221988cd50f478) +--- + gcc/config/i386/cmpccxaddintrin.h | 6 +++--- + gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c | 15 +++++++++++++++ + 2 files changed, 18 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c + +diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h +index 39f368ffc08..9349fb00c1b 100644 +--- a/gcc/config/i386/cmpccxaddintrin.h ++++ b/gcc/config/i386/cmpccxaddintrin.h +@@ -72,11 +72,11 @@ _cmpccxadd_epi64 (long long *__A, long long __B, long long __C, + } + #else + #define _cmpccxadd_epi32(A,B,C,D) \ +- __builtin_ia32_cmpccxadd ((int *) (A), (int) (B), (int) (C), \ ++ __builtin_ia32_cmpccxadd ((A), (int) (B), (int) (C), \ + (_CMPCCX_ENUM) (D)) + #define _cmpccxadd_epi64(A,B,C,D) \ +- __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \ +- (long long) (C), (_CMPCCX_ENUM) (D)) ++ __builtin_ia32_cmpccxadd64 ((A), (long long) (B), (long long) (C), \ ++ (_CMPCCX_ENUM) (D)) + #endif + + #ifdef __DISABLE_CMPCCXADD__ +diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c +new file mode 100644 +index 00000000000..7d20325da50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-O0 -mcmpccxadd" } */ ++#include ++ ++short *a; ++int b, c; ++int *d; ++long long e, f; ++ ++void extern ++cmpccxadd_test(void) ++{ ++ b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_O); /* { dg-error "incompatible pointer type" } */ ++ e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_O); /* { dg-error "incompatible pointer type" } */ ++} +-- +2.31.1 + diff --git a/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch b/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..0942ec613768643b827429459fb4aa3f063173fc --- /dev/null +++ b/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch @@ -0,0 +1,82 @@ +From a39abdaaec03d7506c9c5de258b1b4740540e8d4 Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" +Date: Tue, 5 Nov 2024 15:49:57 +0800 +Subject: [PATCH 14/21] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 + instructions. + +gcc/ChangeLog: + + PR target/117304 + * config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for some + AVX512 512-bits instructions. + +gcc/testsuite/ChangeLog: + + PR target/117304 + * gcc.target/i386/pr117304-1.c: New test. + +(cherry picked from commit 05fd99e3d5e9f00e4e23596ed15a3cec2aaba128) +--- + gcc/config/i386/i386-builtin.def | 10 ++++---- + gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 ++++++++++++++++++++++ + 2 files changed, 33 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr117304-1.c + +diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def +index fdd9dba6e54..ee34e0a1497 100644 +--- a/gcc/config/i386/i386-builtin.def ++++ b/gcc/config/i386/i386-builtin.def +@@ -3065,11 +3065,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ + BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) + BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) + BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) +-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) + BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) + BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) + BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) +diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c +new file mode 100644 +index 00000000000..da26f4bd1b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c +@@ -0,0 +1,28 @@ ++/* PR target/117304 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512f -mno-evex512 -mavx512vl" } */ ++ ++typedef __attribute__((__vector_size__(32))) int __v8si; ++typedef __attribute__((__vector_size__(32))) unsigned int __v8su; ++typedef __attribute__((__vector_size__(64))) double __v8df; ++typedef __attribute__((__vector_size__(64))) int __v16si; ++typedef __attribute__((__vector_size__(64))) unsigned int __v16su; ++typedef __attribute__((__vector_size__(64))) float __v16sf; ++typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); ++ ++volatile __v8df df; ++volatile __v16sf sf; ++volatile __v8si hi; ++volatile __v8su hui; ++volatile __v16si i; ++volatile __v16su ui; ++ ++void ++foo() ++{ ++ hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ ++ hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ ++ ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ ++ ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ ++ __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ ++} +-- +2.31.1 + diff --git a/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch b/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f974150eaf82453d0485188a1bda42976dee6c0 --- /dev/null +++ b/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch @@ -0,0 +1,40 @@ +From d0c96acf46e0073a037d8693e4c8c25b3978fb47 Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" +Date: Thu, 7 Nov 2024 10:13:15 +0800 +Subject: [PATCH 15/21] i386: Modify regexp of pr117304-1.c + +Since the test doesn't care if the hint is correct, +modify the regexp of the hint part to avoid future +changes to the hint that would cause the test to fail. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr117304-1.c: Modify regexp. + +(cherry picked from commit 6a0e143a6449bcc250af13642263f671f756500b) +--- + gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c +index da26f4bd1b7..4f00ff7c92a 100644 +--- a/gcc/testsuite/gcc.target/i386/pr117304-1.c ++++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c +@@ -20,9 +20,9 @@ volatile __v16su ui; + void + foo() + { +- hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ +- hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ +- ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ +- ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ +- __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ ++ hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ ++ hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ ++ ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ ++ ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ ++ __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + } +-- +2.31.1 + diff --git a/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch b/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch new file mode 100644 index 0000000000000000000000000000000000000000..91a4f55721a4f46e84072ffe802d36891d786b03 --- /dev/null +++ b/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch @@ -0,0 +1,30 @@ +From 07372a132627aa03829bdddb99ab0c9f826e4646 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Mon, 11 Nov 2024 10:52:33 +0800 +Subject: [PATCH 16/21] i386: Add new model number for Arrow Lake + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_intel_cpu): Add new model + number for Arrow Lake. + +(cherry picked from commit 4380d6f8acc878fbdeb6ce86f4be64d340bdfd4b) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 56427474b7b..e2f1e2f5f46 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -606,6 +606,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + CHECK___builtin_cpu_is ("grandridge"); + cpu_model->__cpu_type = INTEL_GRANDRIDGE; + break; ++ case 0xb5: + case 0xc5: + /* Arrow Lake. */ + cpu = "arrowlake"; +-- +2.31.1 + diff --git a/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch b/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d5cf6879a6f8dccef836078dec2d981055aae69 --- /dev/null +++ b/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch @@ -0,0 +1,104 @@ +From f729f8e4f7b4a84efb9560c1da582dd6b79cc1dc Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" +Date: Wed, 6 Nov 2024 15:42:13 +0800 +Subject: [PATCH 17/21] i386: Zero extend 32-bit address to 64-bit with option + -mx32 -maddress-mode=long. [PR 117418] + +-maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to +64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. + +gcc/ChangeLog: + + PR target/117418 + * config/i386/i386-expand.cc (ix86_expand_builtin): Convert + pointer's mode according to Pmode. + +gcc/testsuite/ChangeLog: + + PR target/117418 + * gcc.target/i386/pr117418-1.c: New test. + +(cherry picked from commit 8b4bb54e6c45411845ec559c49f594a6239c3969) +--- + gcc/config/i386/i386-expand.cc | 12 +++++++++++ + gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 ++++++++++++++++++++++ + 2 files changed, 36 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/pr117418-1.c + +diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc +index 0a24a46fbf8..52cf79e2a47 100644 +--- a/gcc/config/i386/i386-expand.cc ++++ b/gcc/config/i386/i386-expand.cc +@@ -13475,6 +13475,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + ++ if (GET_MODE (op1) != Pmode) ++ op1 = convert_to_mode (Pmode, op1, 1); ++ + if (!address_operand (op2, VOIDmode)) + { + op2 = convert_memory_address (Pmode, op2); +@@ -13510,6 +13513,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + emit_label (ok_label); + emit_insn (gen_rtx_SET (target, pat)); + ++ if (GET_MODE (op0) != Pmode) ++ op0 = convert_to_mode (Pmode, op0, 1); ++ + for (i = 0; i < 8; i++) + { + op = gen_rtx_MEM (V2DImode, +@@ -13534,6 +13540,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + ++ if (GET_MODE (op2) != Pmode) ++ op2 = convert_to_mode (Pmode, op2, 1); ++ + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); + emit_move_insn (op, op1); + +@@ -13571,6 +13580,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + ++ if (GET_MODE (op3) != Pmode) ++ op3 = convert_to_mode (Pmode, op3, 1); ++ + /* Force to use xmm0, xmm1 for keylow, keyhi*/ + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); + emit_move_insn (op, op1); +diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c +new file mode 100644 +index 00000000000..4839b139b79 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c +@@ -0,0 +1,24 @@ ++/* PR target/117418 */ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ ++/* { dg-require-effective-target maybe_x32 } */ ++/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ ++/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ ++/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ ++/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ ++/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ ++/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ ++ ++typedef __attribute__((__vector_size__(16))) long long V; ++V a; ++ ++void ++foo() ++{ ++ __builtin_ia32_aesdec128kl_u8 (&a, a, &a); ++ __builtin_ia32_aesdec256kl_u8 (&a, a, &a); ++ __builtin_ia32_aesenc128kl_u8 (&a, a, &a); ++ __builtin_ia32_aesenc256kl_u8 (&a, a, &a); ++ __builtin_ia32_encodekey128_u32 (0, a, &a); ++ __builtin_ia32_encodekey256_u32 (0, a, a, &a); ++} +-- +2.31.1 + diff --git a/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch b/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch new file mode 100644 index 0000000000000000000000000000000000000000..62ae5909b648e28f398b60f0b0b6249720d8894c --- /dev/null +++ b/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch @@ -0,0 +1,37 @@ +From 2f29473fe18bbcd6e8ebf036336def1f76ea45bb Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Thu, 21 Nov 2024 23:57:38 -0800 +Subject: [PATCH 18/21] Fix uninitialized operands[2] in vec_unpacks_hi_v4sf. + +It could cause weired spill in RA when register pressure is high. + +gcc/ChangeLog: + + PR target/117562 + * config/i386/sse.md (vec_unpacks_hi_v4sf): Initialize + operands[2] with CONST0_RTX. + +(cherry picked from commit 4a63cc6de77481878ec31e1e6ac30e22c50b063a) +--- + gcc/config/i386/sse.md | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 771c5cd01d2..4376f848d74 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -9628,7 +9628,10 @@ + (match_dup 2) + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" +- "operands[2] = gen_reg_rtx (V4SFmode);") ++{ ++ operands[2] = gen_reg_rtx (V4SFmode); ++ emit_move_insn (operands[2], CONST0_RTX (V4SFmode)); ++}) + + (define_expand "vec_unpacks_hi_v8sf" + [(set (match_dup 2) +-- +2.31.1 + diff --git a/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch b/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch new file mode 100644 index 0000000000000000000000000000000000000000..f66d2b78bb0e275aa3408a6603234ae9160eddc4 --- /dev/null +++ b/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch @@ -0,0 +1,39 @@ +From 7ccf847529c8d9822e44876d3c5d167f9b055cae Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Mon, 10 Feb 2025 14:00:57 +0800 +Subject: [PATCH 19/21] i386: Fix AVX512BW intrin header with __OPTIMIZE__ [PR + 118813] + +When moving intrins around for AVX10 implementation in GCC 14, +the intrin _kshiftli_mask32 and _kshiftri_mask32 are wrongly +wrapped by "#if __OPTIMIZE__" instead of "#ifdef __OPTIMIZE__", +leading to the intrin file not `-Wsystem-headers -Wundef` clean +since r14-4490. + +gcc/ChangeLog: + + PR target/118813 + * config/i386/avx512bwintrin.h: Fix wrong __OPTIMIZE__ + wrap. + +(cherry picked from commit cec0326137ef91e2910a9c70eb9743f032e87137) +--- + gcc/config/i386/avx512bwintrin.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h +index 8991c9c1e57..d07f73dde2f 100644 +--- a/gcc/config/i386/avx512bwintrin.h ++++ b/gcc/config/i386/avx512bwintrin.h +@@ -199,7 +199,7 @@ _kunpackw_mask32 (__mmask16 __A, __mmask16 __B) + (__mmask32) __B); + } + +-#if __OPTIMIZE__ ++#ifdef __OPTIMIZE__ + extern __inline __mmask32 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _kshiftli_mask32 (__mmask32 __A, unsigned int __B) +-- +2.31.1 + diff --git a/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch b/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8cefbe76bbe7fb7fd05ea97a39b97e98156af1f --- /dev/null +++ b/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch @@ -0,0 +1,41 @@ +From f29e5cb8c3386994a4d3b73908deeabab1da316f Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Mon, 10 Feb 2025 16:53:27 +0800 +Subject: [PATCH 20/21] i386: Do not check vector size conflict when AVX512 is + not explicitly set [PR 118815] + +When AVX512 is not explicitly set, we should not take EVEX512 bit into +consideration when checking vector size. It will solve the intrin header +file reporting warnings when compiling with -Wsystem-headers. + +However, there is side effect on the usage for '-march=xxx -mavx10.1-256', +where xxx is with AVX512. It will not report warning on vector size for now. +Since it is a rare usage, we will take it. + +gcc/ChangeLog: + + PR target/118815 + * config/i386/i386-options.cc (ix86_option_override_internal): + Do not check vector size conflict when AVX512 is not explicitly + set. + +(cherry picked from commit 31cbac836bb4f4c2172a91ee6164d8fdd32a8cb8) +--- + gcc/config/i386/i386-options.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index f6c450cc871..a6eba1ca2b8 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -2725,6 +2725,7 @@ ix86_option_override_internal (bool main_args_p, + "using 512 as max vector size"); + } + else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) ++ && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) + && !(OPTION_MASK_ISA2_EVEX512 + & opts->x_ix86_isa_flags2_explicit)) + warning (0, "Vector size conflicts between AVX10.1 and AVX512, using " +-- +2.31.1 + diff --git a/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch b/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch new file mode 100644 index 0000000000000000000000000000000000000000..689104a455ead84572313ad6525a8ebc5415c45b --- /dev/null +++ b/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch @@ -0,0 +1,402 @@ +From 5525dca8e858d3fd06c13b7839f6f9ff8b02c057 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Tue, 11 Feb 2025 11:29:34 +0800 +Subject: [PATCH 21/21] i386: Deprecate -m[no-]avx10.1 and make + -mno-avx10.1-512 to disable the whole AVX10.1 + +Based on the feedback we got, we would like to re-alias avx10.x to 512 +bit in the future. This leaves the current avx10.1 alias to 256 bit +inconsistent. Since it has been there for GCC 14.1 and GCC 14.2, +we decide to deprecate avx10.1 alias. The current proposal is not +adding it back in the future, but it might change if necessary. + +For -mno- options, it is confusing what it is disabling when it comes +to avx10. Since there is barely usage enabling AVX10 with 512 bit +then disabling it, we will only provide -mno-avx10.x options in the +future, disabling the whole AVX10.x. If someone really wants to disable +512 bit after enabling it, -mavx10.x-512 -mno-avx10.x -mavx10.x-256 is +the only way to do that since we also do not want to break the usual +expression on -m- options enabling everything mentioned. + +However, for avx10.1, since we deprecated avx10.1, there is no reason +we should have -mno-avx10.1. Thus, we need to keep -mno-avx10.1-[256,512]. +To avoid confusion, we will make -mno-avx10.1-512 to disable the +whole AVX10.1 set to match the future -mno-avx10.x. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_AVX2_UNSET): Change AVX10.1 unset macro. + (OPTION_MASK_ISA2_AVX10_1_256_UNSET): Removed. + (OPTION_MASK_ISA2_AVX10_1_512_UNSET): Removed. + (OPTION_MASK_ISA2_AVX10_1_UNSET): New. + (ix86_handle_option): Adjust AVX10.1 unset macro. + * common/config/i386/i386-isas.h: Remove avx10.1. + * config/i386/i386-options.cc + (ix86_valid_target_attribute_inner_p): Ditto. + (ix86_option_override_internal): Adjust warning message. + * config/i386/i386.opt: Remove mavx10.1. + * config/i386/i386.opt.urls: Regenerated. + * doc/extend.texi: Remove avx10.1 and adjust doc. + * doc/sourcebuild.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/avx10_1-1.c: Change to avx10.1-256. + * gcc.target/i386/avx10_1-13.c: Ditto. + * gcc.target/i386/avx10_1-14.c: Ditto. + * gcc.target/i386/avx10_1-21.c: Ditto. + * gcc.target/i386/avx10_1-22.c: Ditto. + * gcc.target/i386/avx10_1-23.c: Ditto. + * gcc.target/i386/avx10_1-24.c: Ditto. + * gcc.target/i386/avx10_1-3.c: Ditto. + * gcc.target/i386/avx10_1-5.c: Ditto. + * gcc.target/i386/avx10_1-6.c: Ditto. + * gcc.target/i386/avx10_1-8.c: Ditto. + * gcc.target/i386/avx10_1-12.c: Adjust warning message. + * gcc.target/i386/avx10_1-19.c: Ditto. + * gcc.target/i386/avx10_1-17.c: Adjust to no-avx10.1-512. + +(cherry picked from commit de562367d344758ea9264992e884f031d4435688) +--- + gcc/common/config/i386/i386-common.cc | 15 +++++++-------- + gcc/common/config/i386/i386-isas.h | 1 - + gcc/config/i386/i386-options.cc | 3 +-- + gcc/config/i386/i386.opt | 5 ----- + gcc/config/i386/i386.opt.urls | 3 --- + gcc/doc/extend.texi | 11 ++++------- + gcc/doc/sourcebuild.texi | 5 +---- + gcc/testsuite/gcc.target/i386/avx10_1-1.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-12.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-13.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-14.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-17.c | 4 ++-- + gcc/testsuite/gcc.target/i386/avx10_1-19.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-21.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-22.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-23.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-24.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-3.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-5.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-6.c | 2 +- + gcc/testsuite/gcc.target/i386/avx10_1-8.c | 2 +- + 21 files changed, 28 insertions(+), 45 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index d578918dfb7..bb03ef1e292 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -239,7 +239,7 @@ along with GCC; see the file COPYING3. If not see + (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \ + | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \ + | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET \ +- | OPTION_MASK_ISA2_AVX10_1_256_UNSET) ++ | OPTION_MASK_ISA2_AVX10_1_UNSET) + #define OPTION_MASK_ISA_AVX512F_UNSET \ + (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ + | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ +@@ -319,9 +319,8 @@ along with GCC; see the file COPYING3. If not see + #define OPTION_MASK_ISA2_APX_F_UNSET OPTION_MASK_ISA2_APX_F + #define OPTION_MASK_ISA2_EVEX512_UNSET OPTION_MASK_ISA2_EVEX512 + #define OPTION_MASK_ISA2_USER_MSR_UNSET OPTION_MASK_ISA2_USER_MSR +-#define OPTION_MASK_ISA2_AVX10_1_256_UNSET \ +- (OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512_UNSET) +-#define OPTION_MASK_ISA2_AVX10_1_512_UNSET OPTION_MASK_ISA2_AVX10_1_512 ++#define OPTION_MASK_ISA2_AVX10_1_UNSET \ ++ (OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512) + + /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same + as -mno-sse4.1. */ +@@ -1419,8 +1418,8 @@ ix86_handle_option (struct gcc_options *opts, + } + else + { +- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_256_UNSET; +- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_256_UNSET; ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET; + opts->x_ix86_no_avx10_1_explicit = 1; + } + return true; +@@ -1435,8 +1434,8 @@ ix86_handle_option (struct gcc_options *opts, + } + else + { +- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_512_UNSET; +- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_512_UNSET; ++ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET; ++ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET; + opts->x_ix86_no_avx10_1_explicit = 1; + } + return true; +diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h +index 9c2179a3dd8..017c795e211 100644 +--- a/gcc/common/config/i386/i386-isas.h ++++ b/gcc/common/config/i386/i386-isas.h +@@ -193,7 +193,6 @@ ISA_NAMES_TABLE_START + ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4") + ISA_NAMES_TABLE_ENTRY("apxf", FEATURE_APX_F, P_NONE, "-mapxf") + ISA_NAMES_TABLE_ENTRY("usermsr", FEATURE_USER_MSR, P_NONE, "-musermsr") +- ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1_256, P_NONE, "-mavx10.1") + ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1_256, P_AVX10_1_256, "-mavx10.1-256") + ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1-512") + ISA_NAMES_TABLE_END +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index a6eba1ca2b8..11c6ddf0f44 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1135,7 +1135,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], + IX86_ATTR_ISA ("apxf", OPT_mapxf), + IX86_ATTR_ISA ("evex512", OPT_mevex512), + IX86_ATTR_ISA ("usermsr", OPT_musermsr), +- IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1_256), + IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), + IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512), + +@@ -2746,7 +2745,7 @@ ix86_option_override_internal (bool main_args_p, + && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512) + & opts->x_ix86_isa_flags2_explicit)) + { +- warning (0, "%<-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512%> " ++ warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> " + "cannot disable AVX512 instructions when " + "%<-mavx512XXX%>"); + /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index d5f793a9e8b..f99c4e3ae5d 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1380,8 +1380,3 @@ mavx10.1-512 + Target Mask(ISA2_AVX10_1_512) Var(ix86_isa_flags2) Save + Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, + and AVX10.1-512 built-in functions and code generation. +- +-mavx10.1 +-Target Alias(mavx10.1-256) +-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, +-and AVX10.1 built-in functions and code generation. +diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls +index 81c5bb9a927..3ed76635002 100644 +--- a/gcc/config/i386/i386.opt.urls ++++ b/gcc/config/i386/i386.opt.urls +@@ -615,6 +615,3 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) + mavx10.1-512 + UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) + +-mavx10.1 +-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) +- +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index e290265d68d..a3272bcce30 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -7383,20 +7383,17 @@ Enable/disable the generation of the USER_MSR instructions. + Enable/disable the generation of the APX features, including + EGPR, PUSH2POP2, NDD and PPX. + +-@cindex @code{target("avx10.1")} function attribute, x86 +-@item avx10.1 +-@itemx no-avx10.1 +-Enable/disable the generation of the AVX10.1 instructions. +- + @cindex @code{target("avx10.1-256")} function attribute, x86 + @item avx10.1-256 + @itemx no-avx10.1-256 +-Enable/disable the generation of the AVX10.1 instructions. ++Enable the generation of the AVX10.1 instructions with 256 bit support. ++Disable the generation of the AVX10.1 instructions. + + @cindex @code{target("avx10.1-512")} function attribute, x86 + @item avx10.1-512 + @itemx no-avx10.1-512 +-Enable/disable the generation of the AVX10.1 512 bit instructions. ++Enable the generation of the AVX10.1 instructions with 512 bit support. ++Disable the generation of the AVX10.1 instructions. + + @cindex @code{target("cld")} function attribute, x86 + @item cld +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 8e4e59ac44c..5b026cfe073 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2543,11 +2543,8 @@ Target supports compiling @code{avx} instructions. + @item avx_runtime + Target supports the execution of @code{avx} instructions. + +-@item avx10.1 +-Target supports the execution of @code{avx10.1} instructions. +- + @item avx10.1-256 +-Target supports the execution of @code{avx10.1} instructions. ++Target supports the execution of @code{avx10.1-256} instructions. + + @item avx10.1-512 + Target supports the execution of @code{avx10.1-512} instructions. +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-1.c b/gcc/testsuite/gcc.target/i386/avx10_1-1.c +index cfd9662bb13..33ce99ed60a 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-1.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target { ! ia32 } } } */ +-/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ ++/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ + + #include + +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-12.c b/gcc/testsuite/gcc.target/i386/avx10_1-12.c +index 61f0e4db61b..ae1c77bbcbd 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-12.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-12.c +@@ -1,6 +1,6 @@ + /* { dg-do compile } */ + /* { dg-options "-march=x86-64 -mno-avx10.1-512 -mavx512f" } */ +-/* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" "" { target *-*-* } 0 } */ ++/* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" "" { target *-*-* } 0 } */ + /* { dg-final { scan-assembler "%zmm" } } */ + + #include "avx10_1-2.c" +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-13.c b/gcc/testsuite/gcc.target/i386/avx10_1-13.c +index 8a111190025..e94ac8e1862 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-13.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-13.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=x86-64 -mavx10.1" } */ ++/* { dg-options "-march=x86-64 -mavx10.1-256" } */ + /* { dg-final { scan-assembler "%zmm" } } */ + + typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-14.c b/gcc/testsuite/gcc.target/i386/avx10_1-14.c +index 03222a7a031..76573e644fe 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-14.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-14.c +@@ -4,7 +4,7 @@ + + typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +-__attribute__ ((target ("avx10.1"))) __m512d ++__attribute__ ((target ("avx10.1-256"))) __m512d + foo () + { /* { dg-warning "Vector size conflicts between AVX10.1 and AVX512, using 512 as max vector size" } */ + __m512d a, b; +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-17.c b/gcc/testsuite/gcc.target/i386/avx10_1-17.c +index a19230f597b..09f125215dc 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-17.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-17.c +@@ -4,9 +4,9 @@ + + typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +-__attribute__ ((target ("no-avx10.1"))) __m512d ++__attribute__ ((target ("no-avx10.1-512"))) __m512d + foo () +-{ /* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ ++{ /* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ + __m512d a, b; + a = a + b; + return a; +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-19.c b/gcc/testsuite/gcc.target/i386/avx10_1-19.c +index 7aacc15aad9..7445ecfa548 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-19.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-19.c +@@ -6,7 +6,7 @@ typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + + __attribute__ ((target ("avx512f"))) __m512d + foo () +-{ /* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ ++{ /* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ + __m512d a, b; + a = a + b; + return a; +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-21.c b/gcc/testsuite/gcc.target/i386/avx10_1-21.c +index 27a7265df61..0a1fcc9c0f8 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-21.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-21.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=x86-64 -mavx10.1 -mevex512 -Wno-psabi" } */ ++/* { dg-options "-march=x86-64 -mavx10.1-256 -mevex512 -Wno-psabi" } */ + /* { dg-warning "Using '-mevex512' without any AVX512 features enabled together with AVX10.1 only will not enable any AVX512 or AVX10.1-512 features, using 256 as max vector size" "" { target *-*-* } 0 } */ + /* { dg-final { scan-assembler-not "%zmm" } } */ + +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-22.c b/gcc/testsuite/gcc.target/i386/avx10_1-22.c +index 796262283d6..cb649dc5538 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-22.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-22.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=x86-64 -mavx10.1 -Wno-psabi" } */ ++/* { dg-options "-march=x86-64 -mavx10.1-256 -Wno-psabi" } */ + /* { dg-final { scan-assembler-not "%zmm" } } */ + + typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-23.c b/gcc/testsuite/gcc.target/i386/avx10_1-23.c +index 6e8d64d0f34..f31c63650ab 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-23.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-23.c +@@ -4,7 +4,7 @@ + + typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); + +-__attribute__ ((target ("avx10.1"))) __m512d ++__attribute__ ((target ("avx10.1-256"))) __m512d + foo () + { /* { dg-warning "Using '-mevex512' without any AVX512 features enabled together with AVX10.1 only will not enable any AVX512 or AVX10.1-512 features, using 256 as max vector size" "" { target *-*-* } 0 } */ + __m512d a, b; +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-24.c b/gcc/testsuite/gcc.target/i386/avx10_1-24.c +index 2e93f041760..1bba0fb4b66 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-24.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-24.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ ++/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ + /* { dg-final { scan-assembler-not "%zmm" } } */ + + typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-3.c b/gcc/testsuite/gcc.target/i386/avx10_1-3.c +index 3be988a1a62..a176f2749ce 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-3.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ ++/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ + + #include + +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-5.c b/gcc/testsuite/gcc.target/i386/avx10_1-5.c +index 20b78ea9510..3079cf14ef0 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-5.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-5.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O0 -march=x86-64 -mavx10.1 -Wno-psabi" } */ ++/* { dg-options "-O0 -march=x86-64 -mavx10.1-256 -Wno-psabi" } */ + /* { dg-final { scan-assembler-not ".%zmm" } } */ + + #include "avx10_1-2.c" +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-6.c b/gcc/testsuite/gcc.target/i386/avx10_1-6.c +index fbc92d5c4ca..60dbd05c4a7 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-6.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-6.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ ++/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ + + #include + +diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-8.c b/gcc/testsuite/gcc.target/i386/avx10_1-8.c +index 69b6c6a3e1a..ec930f72218 100644 +--- a/gcc/testsuite/gcc.target/i386/avx10_1-8.c ++++ b/gcc/testsuite/gcc.target/i386/avx10_1-8.c +@@ -1,4 +1,4 @@ + /* { dg-do compile { target { ! ia32 } } } */ +-/* { dg-options "-march=x86-64 -mavx10.1 -mavx512f -mno-evex512" } */ ++/* { dg-options "-march=x86-64 -mavx10.1-256 -mavx512f -mno-evex512" } */ + + #include "avx10_1-1.c" +-- +2.31.1 + diff --git a/gcc-14.spec b/gcc-14.spec index 04d4ee0fc4ac260a7e9cfef2bf3ca0cbab584443..db1e941e9a88d0798938e59911e13eebda55a9c1 100644 --- a/gcc-14.spec +++ b/gcc-14.spec @@ -90,7 +90,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?_scl_prefix}gcc%{gcc_ver} Version: 14.2.1 -Release: 6 +Release: 7 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -174,6 +174,28 @@ Patch1002: GCC14-1002-change-gcc-version.patch Patch1003: GCC14-1003-i386-Add-non-optimize-prefetchi-intrins.patch Patch1004: GCC14-1004-riscv-lib64.patch Patch1005: GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch +Patch1006: GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch +Patch1007: GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch +Patch1008: GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch +Patch1009: GCC14-1009-Check-avx-upper-register-for-parallel.patch +Patch1010: GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch +Patch1011: GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch +Patch1012: GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch +Patch1013: GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch +Patch1014: GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch +Patch1015: GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch +Patch1016: GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch +Patch1017: GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch +Patch1018: GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch +Patch1019: GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch +Patch1020: GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch +Patch1021: GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch +Patch1022: GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch +Patch1023: GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch +Patch1024: GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch +Patch1025: GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch +Patch1026: GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch + # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. %global nonsharedver 80 @@ -2203,6 +2225,9 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Mon Feb 17 2025 Hu, Lin - 14.2.1-7 +- [Sync] Sync patches from gcc.gnu.org's releases/gcc-14 + * Wed Aug 28 2024 YunQiang Su - 14.2.1-6 - Fix build on RISC-V 64.