diff --git a/0001-LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch b/0001-LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch new file mode 100644 index 0000000000000000000000000000000000000000..9100a88ed5138d1b2812498526ad0d30c768c880 --- /dev/null +++ b/0001-LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch @@ -0,0 +1,33 @@ +From 80709e422ee33977d9e50eb18a03a537afee5d6f Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 20 Feb 2025 11:00:49 +0800 +Subject: [PATCH 01/20] LoongArch: Remove the definition of the macro + LOGICAL_OP_NON_SHORT_CIRCUIT under the architecture and use the + defaultdefinition instead. + +In some cases, setting this macro as the default can reduce the number of conditional +branch instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro + definition. +--- + gcc/config/loongarch/loongarch.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6be1029be..4efe23059 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -868,7 +868,6 @@ typedef struct { + 1 is the default; other values are interpreted relative to that. */ + + #define BRANCH_COST(speed_p, predictable_p) la_branch_cost +-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +-- +2.43.0 + diff --git a/0002-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch b/0002-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch new file mode 100644 index 0000000000000000000000000000000000000000..2239d501e88d53d26980ede0cc55c19c901bc977 --- /dev/null +++ b/0002-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch @@ -0,0 +1,36 @@ +From 52947dd357d48cf80d4bef4ef506d9b772eeb8db Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 12 Jun 2024 11:01:53 +0800 +Subject: [PATCH 02/20] LoongArch: Fix mode size comparision in + loongarch_expand_conditional_move + +We were comparing a mode size with word_mode, but word_mode is an enum +value thus this does not really make any sense. (Un)luckily E_DImode +happens to be 8 so this seemed to work, but let's make it correct so it +won't blow up when we add LA32 support or add another machine mode... + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Compare mode size with + UNITS_PER_WORD instead of word_mode. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4b456e3ef..de01b448f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5352,7 +5352,7 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) + { + promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && + REGNO (op0) == REGNO (operands[2])); +-- +2.43.0 + diff --git a/0003-LoongArch-Use-bstrins-for-value-1u-const.patch b/0003-LoongArch-Use-bstrins-for-value-1u-const.patch new file mode 100644 index 0000000000000000000000000000000000000000..5af12839564302627265bf8b3dca5b0bc95b6b21 --- /dev/null +++ b/0003-LoongArch-Use-bstrins-for-value-1u-const.patch @@ -0,0 +1,135 @@ +From 0ee2750446865210091658306fe822a880d87035 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 9 Jun 2024 14:43:48 +0800 +Subject: [PATCH 03/20] LoongArch: Use bstrins for "value & (-1u << const)" + +A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and +pair, and twice fast as a srli/slli pair. When the src reg and the dst +reg happens to be the same, the move instruction can be optimized away. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (high_bitmask_operand): New + predicate. + * config/loongarch/constraints.md (Yy): New constriant. + * config/loongarch/loongarch.md (and3_align): New + define_insn_and_split. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-1.c: New test. + * gcc.target/loongarch/bstrins-2.c: New test. +--- + gcc/config/loongarch/constraints.md | 5 +++++ + gcc/config/loongarch/loongarch.md | 17 +++++++++++++++++ + gcc/config/loongarch/predicates.md | 4 ++++ + gcc/testsuite/gcc.target/loongarch/bstrins-1.c | 9 +++++++++ + gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++ + 5 files changed, 49 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index f07d31650..12cf5e292 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -94,6 +94,7 @@ + ;; "A constant @code{move_operand} that can be safely loaded using + ;; @code{la}." + ;; "Yx" ++;; "Yy" + ;; "Z" - + ;; "ZC" + ;; "A memory operand whose address is formed by a base register and offset +@@ -291,6 +292,10 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + ++(define_constraint "Yy" ++ "@internal" ++ (match_operand 0 "high_bitmask_operand")) ++ + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 5c80c169c..25c1d323b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1542,6 +1542,23 @@ + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + ++(define_insn_and_split "and3_align" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) (match_dup 1)) ++ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) ++ (const_int 0))] ++{ ++ int len; ++ ++ len = low_bitmask_len (mode, ~INTVAL (operands[2])); ++ operands[2] = GEN_INT (len); ++}) ++ + (define_insn_and_split "*bstrins__for_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index eba7f246c..58e406ea5 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,6 +293,10 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + ++(define_predicate "high_bitmask_operand" ++ (and (match_code "const_int") ++ (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) ++ + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +new file mode 100644 +index 000000000..7cb3a9523 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ ++ ++long ++x (long a) ++{ ++ return a & -32; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +new file mode 100644 +index 000000000..9777f502e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\[0-9\]+,\\\$r0,4,0" } } */ ++ ++struct aligned_buffer { ++ _Alignas(32) char x[1024]; ++}; ++ ++extern int f(char *); ++int g(void) ++{ ++ struct aligned_buffer buf; ++ return f(buf.x); ++} +-- +2.43.0 + diff --git a/0004-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch b/0004-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch new file mode 100644 index 0000000000000000000000000000000000000000..53aaa5781b8639aa46e5ed59a3476464322d195c --- /dev/null +++ b/0004-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch @@ -0,0 +1,158 @@ +From c730b5accf8810568763704e0ed321d6774c2ea0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 15 Jun 2024 18:29:43 +0800 +Subject: [PATCH 04/20] LoongArch: Tweak IOR rtx_cost for bstrins + +Consider + + c &= 0xfff; + a &= ~0xfff; + b &= ~0xfff; + a |= c; + b |= c; + +This can be done with 2 bstrins instructions. But we need to recognize +it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff" +forward. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: + (loongarch_use_bstrins_for_ior_with_mask): Split the main logic + into ... + (loongarch_use_bstrins_for_ior_with_mask_1): ... here. + (loongarch_rtx_costs): Special case for IOR those can be + implemented with bstrins. + +gcc/testsuite/ChangeLog; + + * gcc.target/loongarch/bstrins-3.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----- + .../gcc.target/loongarch/bstrins-3.c | 16 ++++ + 2 files changed, 72 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index de01b448f..7476e46ff 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) + return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); + } + ++static int ++loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, ++ unsigned HOST_WIDE_INT mask1, ++ unsigned HOST_WIDE_INT mask2) ++{ ++ if (mask1 != ~mask2 || !mask1 || !mask2) ++ return 0; ++ ++ /* Try to avoid a right-shift. */ ++ if (low_bitmask_len (mode, mask1) != -1) ++ return -1; ++ ++ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) ++ return 1; ++ ++ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) ++ return -1; ++ ++ return 0; ++} ++ + /* Return the cost of moving between two registers of mode MODE. */ + + static int +@@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + /* Fall through. */ + + case IOR: ++ { ++ rtx op[2] = {XEXP (x, 0), XEXP (x, 1)}; ++ if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND ++ && (mode == SImode || (TARGET_64BIT && mode == DImode))) ++ { ++ rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1); ++ if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) ++ { ++ unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); ++ unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); ++ if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ mask0, ++ mask1)) ++ { ++ /* A bstrins instruction */ ++ *total = COSTS_N_INSNS (1); ++ ++ /* A srai instruction */ ++ if (low_bitmask_len (mode, mask0) == -1 ++ && low_bitmask_len (mode, mask1) == -1) ++ *total += COSTS_N_INSNS (1); ++ ++ for (int i = 0; i < 2; i++) ++ *total += set_src_cost (XEXP (op[i], 0), mode, speed); ++ ++ return true; ++ } ++ } ++ } ++ } ++ ++ /* Fall through. */ + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), +@@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void) + int + loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + { +- unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]); +- unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]); +- +- if (mask1 != ~mask2 || !mask1 || !mask2) +- return 0; +- +- /* Try to avoid a right-shift. */ +- if (low_bitmask_len (mode, mask1) != -1) +- return -1; +- +- if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) +- return 1; +- +- if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) +- return -1; +- +- return 0; ++ return loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ UINTVAL (op[2]), ++ UINTVAL (op[4])); + } + + /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +new file mode 100644 +index 000000000..13762bdef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */ ++ ++struct X { ++ long a, b; ++}; ++ ++struct X ++test (long a, long b, long c) ++{ ++ c &= 0xfff; ++ a &= ~0xfff; ++ b &= ~0xfff; ++ return (struct X){.a = a | c, .b = b | c}; ++} +-- +2.43.0 + diff --git a/0005-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch b/0005-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch new file mode 100644 index 0000000000000000000000000000000000000000..269f89bcbb1a071b1df342f7aad702593d6cc1f8 --- /dev/null +++ b/0005-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch @@ -0,0 +1,44 @@ +From 8f8948954cabfc92cc4c60f83bc54f7f7835738e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 16 Jun 2024 12:22:40 +0800 +Subject: [PATCH 05/20] LoongArch: NFC: Dedup and sort the comment in + loongarch_print_operand_reloc + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Dedup and sort the comment describing modifiers. +--- + gcc/config/loongarch/loongarch.cc | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7476e46ff..9148ebcbc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6132,21 +6132,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), + 'z' for (eq:?I ...), 'n' for (ne:?I ...). + 't' Like 'T', but with the EQ/NE cases reversed +- 'F' Print the FPU branch condition for comparison OP. +- 'W' Print the inverse of the FPU branch condition for comparison OP. +- 'w' Print a LSX register. + 'u' Print a LASX register. +- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +- 'z' for (eq:?I ...), 'n' for (ne:?I ...). +- 't' Like 'T', but with the EQ/NE cases reversed +- 'Y' Print loongarch_fp_conditions[INTVAL (OP)] +- 'Z' Print OP and a comma for 8CC, otherwise print nothing. +- 'z' Print $0 if OP is zero, otherwise print OP normally. + 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, + V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. + 'W' Print the inverse of the FPU branch condition for comparison OP. ++ 'w' Print a LSX register. + 'X' Print CONST_INT OP in hexadecimal format. + 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. + 'Y' Print loongarch_fp_conditions[INTVAL (OP)] +-- +2.43.0 + diff --git a/0006-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch b/0006-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch new file mode 100644 index 0000000000000000000000000000000000000000..8080537da42593cfce26afd899d03100b2097fd9 --- /dev/null +++ b/0006-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch @@ -0,0 +1,45 @@ +From ed4da773e214d0b2fa0dfb5136ba8f839c6a7a59 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 28 Jun 2024 15:04:26 +0800 +Subject: [PATCH 06/20] LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c + tests. + +After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX. +Cause these two tests to fail. To guarantee that these two tests pass, +add the compilation option '-fno-late-combine-instructions'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: + Add compilation options '-fno-late-combine-instructions'. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise. +--- + .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +- + gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +index 3797556e1..e9eb0d6f7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +index f66903091..fed478458 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +-- +2.43.0 + diff --git a/0007-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch b/0007-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch new file mode 100644 index 0000000000000000000000000000000000000000..91cf927215e5429609e754f98caa7f9d66f84ce4 --- /dev/null +++ b/0007-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch @@ -0,0 +1,70 @@ +From 2c763d7ec71ef51f8704c8c3444dd5ab9144dbc6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 28 Jun 2024 15:09:48 +0800 +Subject: [PATCH 07/20] LoongArch: Define loongarch_insn_cost and set the cost + of movcf2gr and movgr2cf. + +The following two FAIL items have been fixed: + +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movcf2fr\\t\\\\\$f[0-9]+,\\\\\$fcc +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movfr2gr\\\\.s\\t\\\\\$r4 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_insn_cost): + New function. + (TARGET_INSN_COST): New macro. +--- + gcc/config/loongarch/loongarch.cc | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9148ebcbc..2b2f3c613 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4372,6 +4372,33 @@ loongarch_address_cost (rtx addr, machine_mode mode, + return loongarch_address_insns (addr, mode, false); + } + ++/* Implement TARGET_INSN_COST. */ ++ ++static int ++loongarch_insn_cost (rtx_insn *insn, bool speed) ++{ ++ rtx x = PATTERN (insn); ++ int cost = pattern_cost (x, speed); ++ ++ /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr. */ ++ if (GET_CODE (x) == SET ++ && GET_MODE (XEXP (x, 0)) == FCCmode) ++ { ++ rtx dest, src; ++ dest = XEXP (x, 0); ++ src = XEXP (x, 1); ++ ++ if (REG_P (dest) && REG_P (src)) ++ { ++ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src))) ++ cost = loongarch_cost->movcf2gr; ++ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src))) ++ cost = loongarch_cost->movgr2cf; ++ } ++ } ++ return cost; ++} ++ + /* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ +@@ -11093,6 +11120,8 @@ loongarch_asm_code_end (void) + #define TARGET_RTX_COSTS loongarch_rtx_costs + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST loongarch_address_cost ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST loongarch_insn_cost + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost +-- +2.43.0 + diff --git a/0008-LoongArch-Remove-unreachable-codes.patch b/0008-LoongArch-Remove-unreachable-codes.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c027496c9c9c81715287ff99c09de13abcff309 --- /dev/null +++ b/0008-LoongArch-Remove-unreachable-codes.patch @@ -0,0 +1,249 @@ +From b20fb490bf25a076b373fea56cca6c8191584150 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 4 Jul 2024 15:00:40 +0800 +Subject: [PATCH 08/20] LoongArch: Remove unreachable codes. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_split_move): Delete. + (loongarch_hard_regno_mode_ok_uncached): Likewise. + * config/loongarch/loongarch.md + (move_doubleword_fpr): Likewise. + (load_low): Likewise. + (load_high): Likewise. + (store_word): Likewise. + (movgr2frh): Likewise. + (movfrh2gr): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 47 +++---------- + gcc/config/loongarch/loongarch.md | 109 ------------------------------ + 2 files changed, 8 insertions(+), 148 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 2b2f3c613..aabada83d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4462,42 +4462,13 @@ loongarch_split_move_p (rtx dest, rtx src) + void + loongarch_split_move (rtx dest, rtx src) + { +- rtx low_dest; +- + gcc_checking_assert (loongarch_split_move_p (dest, src)); + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_128bit_move (dest, src); + else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_256bit_move (dest, src); +- else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) +- { +- if (!TARGET_64BIT && GET_MODE (dest) == DImode) +- emit_insn (gen_move_doubleword_fprdi (dest, src)); +- else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) +- emit_insn (gen_move_doubleword_fprdf (dest, src)); +- else if (TARGET_64BIT && GET_MODE (dest) == TFmode) +- emit_insn (gen_move_doubleword_fprtf (dest, src)); +- else +- gcc_unreachable (); +- } + else +- { +- /* The operation can be split into two normal moves. Decide in +- which order to do them. */ +- low_dest = loongarch_subword (dest, false); +- if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) +- { +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- } +- else +- { +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- } +- } ++ gcc_unreachable (); + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -6746,20 +6717,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); + +- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) ++ if (GP_REG_P (regno) ++ && !LSX_SUPPORTED_MODE_P (mode) + && !LASX_SUPPORTED_MODE_P (mode)) + return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + +- /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) +- return true; +- +- /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) +- return true; +- + if (FP_REG_P (regno)) + { ++ /* Allow 128-bit or 256-bit vector modes in all FPR. */ ++ if (LSX_SUPPORTED_MODE_P (mode) ++ || LASX_SUPPORTED_MODE_P (mode)) ++ return true; ++ + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 25c1d323b..21890a2d9 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,9 +400,6 @@ + ;; 64-bit modes for which we provide move patterns. + (define_mode_iterator MOVE64 [DI DF]) + +-;; 128-bit modes for which we provide move patterns on 64-bit targets. +-(define_mode_iterator MOVE128 [TI TF]) +- + ;; Iterator for sub-32-bit integer modes. + (define_mode_iterator SHORT [QI HI]) + +@@ -421,12 +418,6 @@ + (define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT") + (DI "TARGET_DOUBLE_FLOAT")]) + +-;; A mode for which moves involving FPRs may need to be split. +-(define_mode_iterator SPLITF +- [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")]) +- + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. + (define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF]) +@@ -2421,41 +2412,6 @@ + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")]) + +-;; Emit a doubleword move in which exactly one of the operands is +-;; a floating-point register. We can't just emit two normal moves +-;; because of the constraints imposed by the FPU register model; +-;; see loongarch_can_change_mode_class for details. Instead, we keep +-;; the FPR whole and use special patterns to refer to each word of +-;; the other operand. +- +-(define_expand "move_doubleword_fpr" +- [(set (match_operand:SPLITF 0) +- (match_operand:SPLITF 1))] +- "" +-{ +- if (FP_REG_RTX_P (operands[0])) +- { +- rtx low = loongarch_subword (operands[1], 0); +- rtx high = loongarch_subword (operands[1], 1); +- emit_insn (gen_load_low (operands[0], low)); +- if (!TARGET_64BIT) +- emit_insn (gen_movgr2frh (operands[0], high, operands[0])); +- else +- emit_insn (gen_load_high (operands[0], high, operands[0])); +- } +- else +- { +- rtx low = loongarch_subword (operands[0], 0); +- rtx high = loongarch_subword (operands[0], 1); +- emit_insn (gen_store_word (low, operands[1], const0_rtx)); +- if (!TARGET_64BIT) +- emit_insn (gen_movfrh2gr (high, operands[1])); +- else +- emit_insn (gen_store_word (high, operands[1], const1_rtx)); +- } +- DONE; +-}) +- + ;; Clear one FCC register + + (define_expand "movfcc" +@@ -2742,49 +2698,6 @@ + [(set_attr "type" "fcvt") + (set_attr "mode" "")]) + +-;; Load the low word of operand 0 with operand 1. +-(define_insn "load_low" +- [(set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF [(match_operand: 1 "general_operand" "rJ,m")] +- UNSPEC_LOAD_LOW))] +- "TARGET_HARD_FLOAT" +-{ +- operands[0] = loongarch_subword (operands[0], 0); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "")]) +- +-;; Load the high word of operand 0 from operand 1, preserving the value +-;; in the low word. +-(define_insn "load_high" +- [(set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF [(match_operand: 1 "general_operand" "rJ,m") +- (match_operand:SPLITF 2 "register_operand" "0,0")] +- UNSPEC_LOAD_HIGH))] +- "TARGET_HARD_FLOAT" +-{ +- operands[0] = loongarch_subword (operands[0], 1); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "")]) +- +-;; Store one word of operand 1 in operand 0. Operand 2 is 1 to store the +-;; high word and 0 to store the low word. +-(define_insn "store_word" +- [(set (match_operand: 0 "nonimmediate_operand" "=r,m") +- (unspec: [(match_operand:SPLITF 1 "register_operand" "f,f") +- (match_operand 2 "const_int_operand")] +- UNSPEC_STORE_WORD))] +- "TARGET_HARD_FLOAT" +-{ +- operands[1] = loongarch_subword (operands[1], INTVAL (operands[2])); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mftg,fpstore") +- (set_attr "mode" "")]) +- + ;; Thread-Local Storage + + (define_insn "@got_load_tls_desc" +@@ -2876,28 +2789,6 @@ + (const_int 4) + (const_int 2)))]) + +-;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the +-;; value in the low word. +-(define_insn "movgr2frh" +- [(set (match_operand:SPLITF 0 "register_operand" "=f") +- (unspec:SPLITF [(match_operand: 1 "reg_or_0_operand" "rJ") +- (match_operand:SPLITF 2 "register_operand" "0")] +- UNSPEC_MOVGR2FRH))] +- "TARGET_DOUBLE_FLOAT" +- "movgr2frh.w\t%z1,%0" +- [(set_attr "move_type" "mgtf") +- (set_attr "mode" "")]) +- +-;; Move high word of operand 1 to operand 0 using movfrh2gr.s. +-(define_insn "movfrh2gr" +- [(set (match_operand: 0 "register_operand" "=r") +- (unspec: [(match_operand:SPLITF 1 "register_operand" "f")] +- UNSPEC_MOVFRH2GR))] +- "TARGET_DOUBLE_FLOAT" +- "movfrh2gr.s\t%0,%1" +- [(set_attr "move_type" "mftg") +- (set_attr "mode" "")]) +- + + ;; Expand in-line code to clear the instruction cache between operand[0] and + ;; operand[1]. +-- +2.43.0 + diff --git a/0009-LoongArch-Organize-the-code-related-to-split-move-an.patch b/0009-LoongArch-Organize-the-code-related-to-split-move-an.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c32cb3af37be81aed189c0eb884d5604ed278f0 --- /dev/null +++ b/0009-LoongArch-Organize-the-code-related-to-split-move-an.patch @@ -0,0 +1,413 @@ +From 15a1e38d312fa60c4cbd859b56dba7d19176cee1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 12 Jul 2024 09:57:40 +0800 +Subject: [PATCH 09/20] LoongArch: Organize the code related to split move and + merge the same functions. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_split_128bit_move): Delete. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_vector_move): Add a function declaration. + * config/loongarch/loongarch.cc + (loongarch_vector_costs::finish_cost): Adjust the code + formatting. + (loongarch_split_vector_move_p): Merge + loongarch_split_128bit_move_p and loongarch_split_256bit_move_p. + (loongarch_split_move_p): Merge code. + (loongarch_split_move): Likewise. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_128bit_move): Delete. + (loongarch_split_vector_move): Merge loongarch_split_128bit_move + and loongarch_split_256bit_move. + (loongarch_split_256bit_move): Delete. + (loongarch_global_init): Remove the extra semicolon at the + end of the function. + * config/loongarch/loongarch.md (*movdf_softfloat): Added a new + condition TARGET_64BIT. +--- + gcc/config/loongarch/loongarch-protos.h | 5 +- + gcc/config/loongarch/loongarch.cc | 221 ++++++------------------ + gcc/config/loongarch/loongarch.md | 1 + + 3 files changed, 58 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index e238d795a..85f6e8943 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx); + extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern void loongarch_split_128bit_move (rtx, rtx); +-extern bool loongarch_split_128bit_move_p (rtx, rtx); +-extern void loongarch_split_256bit_move (rtx, rtx); +-extern bool loongarch_split_256bit_move_p (rtx, rtx); ++extern void loongarch_split_vector_move (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index aabada83d..4b6b9e14b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4354,10 +4354,10 @@ void + loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) + { + loop_vec_info loop_vinfo = dyn_cast (m_vinfo); ++ + if (loop_vinfo) +- { +- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); +- } ++ m_suggested_unroll_factor ++ = determine_suggested_unroll_factor (loop_vinfo); + + vector_costs::finish_cost (scalar_costs); + } +@@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p) + return simplify_gen_subreg (word_mode, op, mode, byte); + } + ++static bool loongarch_split_vector_move_p (rtx dest, rtx src); + /* Return true if a move from SRC to DEST should be split into two. + SPLIT_TYPE describes the split condition. */ + +@@ -4444,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src) + return false; + } + +- /* Check if LSX moves need splitting. */ +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_128bit_move_p (dest, src); + +- /* Check if LASX moves need splitting. */ +- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_256bit_move_p (dest, src); ++ /* Check if vector moves need splitting. */ ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ return loongarch_split_vector_move_p (dest, src); + + /* Otherwise split all multiword moves. */ + return size > UNITS_PER_WORD; +@@ -4463,10 +4462,9 @@ void + loongarch_split_move (rtx dest, rtx src) + { + gcc_checking_assert (loongarch_split_move_p (dest, src)); +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_128bit_move (dest, src); +- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_256bit_move (dest, src); ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ loongarch_split_vector_move (dest, src); + else + gcc_unreachable (); + } +@@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr) + + return insn[ldr][index-2]; + } +-/* Return true if a 128-bit move from SRC to DEST should be split. */ +- +-bool +-loongarch_split_128bit_move_p (rtx dest, rtx src) +-{ +- /* LSX-to-LSX moves can be done in a single instruction. */ +- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) +- return false; +- +- /* Check for LSX loads and stores. */ +- if (FP_REG_RTX_P (dest) && MEM_P (src)) +- return false; +- if (FP_REG_RTX_P (src) && MEM_P (dest)) +- return false; +- +- /* Check for LSX set to an immediate const vector with valid replicated +- element. */ +- if (FP_REG_RTX_P (dest) +- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) +- return false; +- +- /* Check for LSX load zero immediate. */ +- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) +- return false; +- +- return true; +-} +- +-/* Return true if a 256-bit move from SRC to DEST should be split. */ ++/* Return true if a vector move from SRC to DEST should be split. */ + +-bool +-loongarch_split_256bit_move_p (rtx dest, rtx src) ++static bool ++loongarch_split_vector_move_p (rtx dest, rtx src) + { +- /* LSX-to-LSX moves can be done in a single instruction. */ ++ /* Vector moves can be done in a single instruction. */ + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + +- /* Check for LSX loads and stores. */ ++ /* Check for vector loads and stores. */ + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + +- /* Check for LSX set to an immediate const vector with valid replicated ++ /* Check for vector set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) + && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + return false; + +- /* Check for LSX load zero immediate. */ ++ /* Check for vector load zero immediate. */ + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) + return false; + + return true; + } + +-/* Split a 128-bit move from SRC to DEST. */ ++/* Split a vector move from SRC to DEST. */ + + void +-loongarch_split_128bit_move (rtx dest, rtx src) ++loongarch_split_vector_move (rtx dest, rtx src) + { + int byte, index; +- rtx low_dest, low_src, d, s; ++ rtx s, d; ++ machine_mode mode = GET_MODE (dest); ++ bool lsx_p = LSX_SUPPORTED_MODE_P (mode); + + if (FP_REG_RTX_P (dest)) + { + gcc_assert (!MEM_P (src)); + +- rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V4SImode) +- new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V2DImode) +- new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0); +- } +- +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- } +- } +- else if (FP_REG_RTX_P (src)) +- { +- gcc_assert (!MEM_P (dest)); +- +- rtx new_src = src; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (src) != V4SImode) +- new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- } +- else +- { +- if (GET_MODE (src) != V2DImode) +- new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0); +- } ++ rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx); + +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); +- } +- } +- else +- { +- low_dest = loongarch_subword_at_byte (dest, 0); +- low_src = loongarch_subword_at_byte (src, 0); +- gcc_assert (REG_P (low_dest) && REG_P (low_src)); +- /* Make sure the source register is not written before reading. */ +- if (REGNO (low_dest) <= REGNO (low_src)) ++ if (lsx_p) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V2DImode; ++ gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d; + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V4DImode; ++ gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d; + } +- } +-} +- +-/* Split a 256-bit move from SRC to DEST. */ +- +-void +-loongarch_split_256bit_move (rtx dest, rtx src) +-{ +- int byte, index; +- rtx low_dest, low_src, d, s; +- +- if (FP_REG_RTX_P (dest)) +- { +- gcc_assert (!MEM_P (src)); + + rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V8SImode) +- new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V4DImode) +- new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0); +- } ++ ++ if (GET_MODE (dest) != mode) ++ new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0); + + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD, index++) + { + s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); ++ emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest, ++ GEN_INT (1 << index))); + } + } + else if (FP_REG_RTX_P (src)) + { + gcc_assert (!MEM_P (dest)); + +- rtx new_src = src; +- if (!TARGET_64BIT) ++ rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx); ++ ++ if (lsx_p) + { +- if (GET_MODE (src) != V8SImode) +- new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0); ++ mode = V2DImode; ++ gen_vpickve2gr_d = gen_lsx_vpickve2gr_d; + } + else + { +- if (GET_MODE (src) != V4DImode) +- new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0); ++ mode = V4DImode; ++ gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d; + } + ++ rtx new_src = src; ++ if (GET_MODE (src) != mode) ++ new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0); ++ + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src)); + byte += UNITS_PER_WORD, index++) + { + d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); ++ emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index))); + } + } + else + { ++ /* This part of the code is designed to handle the following situations: ++ (set (reg:V2DI 4 $r4) ++ (reg:V2DI 6 $r6)) ++ The trigger test case is lsx-mov-1.c. */ ++ rtx low_dest, low_src; ++ + low_dest = loongarch_subword_at_byte (dest, 0); + low_src = loongarch_subword_at_byte (src, 0); + gcc_assert (REG_P (low_dest) && REG_P (low_src)); + /* Make sure the source register is not written before reading. */ + if (REGNO (low_dest) <= REGNO (low_src)) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); ++ for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); +@@ -4815,8 +4706,8 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) ++ for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD; ++ byte >= 0; byte -= UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); + s = loongarch_subword_at_byte (src, byte); +@@ -7606,7 +7497,7 @@ loongarch_global_init (void) + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_reg_init (void) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 21890a2d9..459ad30b9 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2406,6 +2406,7 @@ + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:DF 1 "move_operand" "rG,m,rG"))] + "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) ++ && TARGET_64BIT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + { return loongarch_output_move (operands[0], operands[1]); } +-- +2.43.0 + diff --git a/0010-LoongArch-Expand-some-SImode-operations-through-si3_.patch b/0010-LoongArch-Expand-some-SImode-operations-through-si3_.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a295691c622fa32c1c8c430a677a452959a479e --- /dev/null +++ b/0010-LoongArch-Expand-some-SImode-operations-through-si3_.patch @@ -0,0 +1,364 @@ +From 637e6e0c2f3a38ba9f56174e2e92a3ff39e88344 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 20 Jul 2024 20:38:13 +0800 +Subject: [PATCH 10/20] LoongArch: Expand some SImode operations through + "si3_extend" instructions if TARGET_64BIT + +We already had "si3_extend" insns and we hoped the fwprop or combine +passes can use them to remove unnecessary sign extensions. But this +does not always work: for cases like x << 1 | y, the compiler +tends to do + + (sign_extend:DI + (ior:SI (ashift:SI (reg:SI $r4) + (const_int 1)) + (reg:SI $r5))) + +instead of + + (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1))) + (sign_extend:DI (reg:SI $r5))) + +So we cannot match the ashlsi3_extend instruction here and we get: + + slli.w $r4,$r4,1 + or $r4,$r5,$r4 + slli.w $r4,$r4,0 # <= redundant + jr $r1 + +To eliminate this redundant extension we need to turn SImode shift etc. +to DImode "si3_extend" operations earlier, when we expand the SImode +operation. We are already doing this for addition, now do it for +shifts, rotates, substract, multiplication, division, and modulo as +well. + +The bytepick.w definition for TARGET_64BIT needs to be adjusted so it +won't be undone by the shift expanding. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (optab): Add (rotatert "rotr"). + (3, 3, + sub3, rotr3, mul3): Add a "*" to the insn name + so we can redefine the names with define_expand. + (*si3_extend): Remove "*" so we can use them + in expanders. + (*subsi3_extended, *mulsi3_extended): Likewise, also remove the + trailing "ed" for consistency. + (*si3_extended): Add mode for sign_extend to + prevent an ICE using it in expanders. + (shift_w, arith_w): New define_code_iterator. + (3): New define_expand. Expand with + si3_extend for SImode if TARGET_64BIT. + (3): Likewise. + (mul3): Expand to mulsi3_extended for SImode if + TARGET_64BIT and ISA_HAS_DIV32. + (3): Expand to si3_extended + for SImode if TARGET_64BIT. + (rotl3): Expand to rotrsi3_extend for SImode if + TARGET_64BIT. + (bytepick_w_): Add mode for lshiftrt and ashift. + (bitsize, bytepick_imm, bytepick_w_ashift_amount): New + define_mode_attr. + (bytepick_w__extend): Adjust for the RTL change + caused by 32-bit shift expanding. Now bytepick_imm only covers + 2 and 3, separate one remaining case to ... + (bytepick_w_1_extend): ... here, new define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bitwise_extend.c: New test. +--- + gcc/config/loongarch/loongarch.md | 131 +++++++++++++++--- + .../gcc.target/loongarch/bitwise_extend.c | 45 ++++++ + 2 files changed, 154 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 459ad30b9..9bad79bbf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -546,6 +546,7 @@ + (define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") ++ (rotatert "rotr") + (ior "ior") + (xor "xor") + (and "and") +@@ -624,6 +625,49 @@ + (48 "6") + (56 "7")]) + ++;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT ++;; so the redundant sign extension can be removed if the output is used as ++;; an input of a bitwise operation. Note plus, rotl, and div are handled ++;; separately. ++(define_code_iterator shift_w [any_shift rotatert]) ++(define_code_iterator arith_w [minus mult]) ++ ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (shift_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:SI 2 "arith_operand" "rI")))] ++ "" ++{ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extend (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++}) ++ ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (arith_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "register_operand" "r")))] ++ "" ++{ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extend (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++}) ++ + ;; + ;; .................... + ;; +@@ -781,7 +825,7 @@ + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +-(define_insn "sub3" ++(define_insn "*sub3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] +@@ -791,7 +835,7 @@ + (set_attr "mode" "")]) + + +-(define_insn "*subsi3_extended" ++(define_insn "subsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") +@@ -818,7 +862,7 @@ + [(set_attr "type" "fmul") + (set_attr "mode" "")]) + +-(define_insn "mul3" ++(define_insn "*mul3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] +@@ -827,7 +871,7 @@ + [(set_attr "type" "imul") + (set_attr "mode" "")]) + +-(define_insn "*mulsi3_extended" ++(define_insn "mulsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") +@@ -1001,8 +1045,19 @@ + (match_operand:GPR 2 "register_operand")))] + "" + { +- if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) ++ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT) + { ++ if (ISA_HAS_DIV32) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extended (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++ + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx rd = gen_reg_rtx (DImode); +@@ -1038,7 +1093,7 @@ + + (define_insn "si3_extended" + [(set (match_operand:DI 0 "register_operand" "=r,&r,&r") +- (sign_extend ++ (sign_extend:DI + (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") + (match_operand:SI 2 "register_operand" "r,r,r"))))] + "TARGET_64BIT && ISA_HAS_DIV32" +@@ -2981,7 +3036,7 @@ + ;; + ;; .................... + +-(define_insn "3" ++(define_insn "*3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_shift:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] +@@ -2996,7 +3051,7 @@ + [(set_attr "type" "shift") + (set_attr "mode" "")]) + +-(define_insn "*si3_extend" ++(define_insn "si3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (any_shift:SI (match_operand:SI 1 "register_operand" "r") +@@ -3011,7 +3066,7 @@ + [(set_attr "type" "shift") + (set_attr "mode" "SI")]) + +-(define_insn "rotr3" ++(define_insn "*rotr3" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,I")))] +@@ -3040,6 +3095,19 @@ + "" + { + operands[3] = gen_reg_rtx (SImode); ++ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ ++ emit_insn (gen_negsi2 (operands[3], operands[2])); ++ emit_insn (gen_rotrsi3_extend (t, operands[1], operands[3])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } + }); + + ;; The following templates were added to generate "bstrpick.d + alsl.d" +@@ -4061,26 +4129,45 @@ + + (define_insn "bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int )) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount))))] ++ (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int )) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount))))] + "" + "bytepick.w\t%0,%1,%2," + [(set_attr "mode" "SI")]) + ++(define_mode_attr bitsize [(QI "8") (HI "16")]) ++(define_mode_attr bytepick_imm [(QI "3") (HI "2")]) ++(define_mode_attr bytepick_w_ashift_amount [(QI "24") (HI "16")]) ++ + (define_insn "bytepick_w__extend" + [(set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI +- (subreg:SI +- (ior:DI (subreg:DI (lshiftrt +- (match_operand:SI 1 "register_operand" "r") +- (const_int )) 0) +- (subreg:DI (ashift +- (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount)) 0)) 0)))] ++ (ior:DI ++ (ashift:DI ++ (sign_extend:DI ++ (subreg:SHORT (match_operand:DI 1 "register_operand" "r") 0)) ++ (const_int )) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int ) ++ (const_int ))))] + "TARGET_64BIT" +- "bytepick.w\t%0,%1,%2," ++ "bytepick.w\t%0,%2,%1," ++ [(set_attr "mode" "SI")]) ++ ++(define_insn "bytepick_w_1_extend" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ior:DI ++ (ashift:DI ++ (sign_extract:DI (match_operand:DI 1 "register_operand" "r") ++ (const_int 24) ++ (const_int 0)) ++ (const_int 8)) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int 8) ++ (const_int 24))))] ++ "TARGET_64BIT" ++ "bytepick.w\t%0,%2,%1,1" + [(set_attr "mode" "SI")]) + + (define_insn "bytepick_d_" +diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +new file mode 100644 +index 000000000..c2bc489a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mdiv32" } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++int ++f1 (int a, int b) ++{ ++ return (a << b) | b; ++} ++ ++int ++f2 (int a, int b) ++{ ++ return (a - b) | b; ++} ++ ++int ++f3 (int a, int b) ++{ ++ return (a * b) | b; ++} ++ ++int ++f4 (int a, int b) ++{ ++ return (unsigned) a >> b | (unsigned) a << (32 - b) | b; ++} ++ ++int ++f5 (int a, int b) ++{ ++ return (unsigned) a << b | (unsigned) a >> (32 - b) | b; ++} ++ ++int ++f6 (int a, int b) ++{ ++ return (a % b) | b; ++} ++ ++int ++f7 (int a, int b) ++{ ++ return (a + b) | b; ++} +-- +2.43.0 + diff --git a/0011-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch b/0011-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a36624665a2d8f15fab7168ab58d3bc38024263a --- /dev/null +++ b/0011-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch @@ -0,0 +1,123 @@ +From 6f1e77e40203e99258086c22d2079254d8b3402e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 28 Jul 2024 17:02:49 +0800 +Subject: [PATCH 11/20] LoongArch: Relax ins_zero_bitmask_operand and remove + and3_align + +In r15-1207 I was too stupid to realize we just need to relax +ins_zero_bitmask_operand to allow using bstrins for aligning, instead of +adding a new split. And, "> 12" in ins_zero_bitmask_operand also makes +no sense: it rejects bstrins for things like "x & ~4l" with no good +reason. + +So fix my errors now. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (ins_zero_bitmask_operand): + Cover more cases that bstrins can benefit. + (high_bitmask_operand): Remove. + * config/loongarch/constraints.md (Yy): Remove. + * config/loongarch/loongarch.md (and3_align): Remove. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-4.c: New test. +--- + gcc/config/loongarch/constraints.md | 4 ---- + gcc/config/loongarch/loongarch.md | 17 ----------------- + gcc/config/loongarch/predicates.md | 9 ++------- + gcc/testsuite/gcc.target/loongarch/bstrins-4.c | 9 +++++++++ + 4 files changed, 11 insertions(+), 28 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index 12cf5e292..18da8b31f 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -292,10 +292,6 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + +-(define_constraint "Yy" +- "@internal" +- (match_operand 0 "high_bitmask_operand")) +- + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9bad79bbf..280d1c403 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1588,23 +1588,6 @@ + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + +-(define_insn_and_split "and3_align" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] +- "" +- "#" +- "" +- [(set (match_dup 0) (match_dup 1)) +- (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) +- (const_int 0))] +-{ +- int len; +- +- len = low_bitmask_len (mode, ~INTVAL (operands[2])); +- operands[2] = GEN_INT (len); +-}) +- + (define_insn_and_split "*bstrins__for_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 58e406ea5..95c2544cc 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,10 +293,6 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +-(define_predicate "high_bitmask_operand" +- (and (match_code "const_int") +- (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) +- + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +@@ -406,11 +402,10 @@ + + (define_predicate "ins_zero_bitmask_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) != -1") +- (match_test "INTVAL (op) & 1") + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ +- > 12"))) ++ > 0") ++ (not (match_operand 0 "const_uns_arith_operand")))) + + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +new file mode 100644 +index 000000000..0823cfc38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */ ++ ++long ++x (long a) ++{ ++ return a & ~4; ++} +-- +2.43.0 + diff --git a/0012-LoongArch-Rework-bswap-hi-si-di-2-definition.patch b/0012-LoongArch-Rework-bswap-hi-si-di-2-definition.patch new file mode 100644 index 0000000000000000000000000000000000000000..54bd1b620f3ddf7880036c7f92bbd1adddffaadf --- /dev/null +++ b/0012-LoongArch-Rework-bswap-hi-si-di-2-definition.patch @@ -0,0 +1,224 @@ +From 576f0886422697a97bb96d3abc43f6ef15f470c5 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 28 Jul 2024 19:57:02 +0800 +Subject: [PATCH 12/20] LoongArch: Rework bswap{hi,si,di}2 definition + +Per a gcc-help thread we are generating sub-optimal code for +__builtin_bswap{32,64}. To fix it: + +- Use a single revb.d instruction for bswapdi2. +- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT, + revb.2h + rotri.w for !TARGET_64BIT. +- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single + revb.2w instruction for bswapdi2 (x) r>> 32. + +Unfortunately I cannot figure out a way to make the compiler generate +revb.4h or revh.{2w,d} instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H, + UNSPEC_REVH_D): Remove UNSPECs. + (revb_4h, revh_d): Remove define_insn. + (revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of + an UNSPEC. + (revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn. + (bswapsi2): Change to define_expand. Only expand to revb.2h + + rotri.w if !TARGET_64BIT. + (bswapdi2): Change to define_insn of which the output is just a + revb.d instruction. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/revb.c: New test. +--- + gcc/config/loongarch/loongarch.md | 79 ++++++++++++----------- + gcc/testsuite/gcc.target/loongarch/revb.c | 61 +++++++++++++++++ + 2 files changed, 104 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 280d1c403..ee0310f2b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -20,11 +20,6 @@ + ;; . + + (define_c_enum "unspec" [ +- ;; Integer operations that are too cumbersome to describe directly. +- UNSPEC_REVB_2H +- UNSPEC_REVB_4H +- UNSPEC_REVH_D +- + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH +@@ -3151,55 +3146,67 @@ + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. + +-(define_insn "bswaphi2" +- [(set (match_operand:HI 0 "register_operand" "=r") +- (bswap:HI (match_operand:HI 1 "register_operand" "r")))] ++(define_insn "revb_2h" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16)))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn_and_split "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "" +- "#" +- "" +- [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_REVB_2H)) +- (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] +- "" +- [(set_attr "insn_count" "2")]) +- +-(define_insn_and_split "bswapdi2" ++(define_insn "revb_2h_extend" + [(set (match_operand:DI 0 "register_operand" "=r") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] ++ (sign_extend:DI ++ (rotatert:SI ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16))))] + "TARGET_64BIT" +- "#" +- "" +- [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_REVB_4H)) +- (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_REVH_D))] +- "" +- [(set_attr "insn_count" "2")]) ++ "revb.2h\t%0,%1" ++ [(set_attr "type" "shift")]) + +-(define_insn "revb_2h" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_REVB_2H))] ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "revb_4h" ++(define_insn "revb_2w" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVB_4H))] ++ (rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r")) ++ (const_int 32)))] + "TARGET_64BIT" +- "revb.4h\t%0,%1" ++ "revb.2w\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "revh_d" ++(define_insn "*bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_64BIT" ++ "revb.2w\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "" ++{ ++ if (!TARGET_64BIT) ++ { ++ rtx t = gen_reg_rtx (SImode); ++ emit_insn (gen_revb_2h (t, operands[1])); ++ emit_insn (gen_rotrsi3 (operands[0], t, GEN_INT (16))); ++ DONE; ++ } ++}) ++ ++(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVH_D))] ++ (bswap:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" +- "revh.d\t%0,%1" ++ "revb.d\t%0,%1" + [(set_attr "type" "shift")]) ++ + + ;; + ;; .................... +diff --git a/gcc/testsuite/gcc.target/loongarch/revb.c b/gcc/testsuite/gcc.target/loongarch/revb.c +new file mode 100644 +index 000000000..27a5d0fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/revb.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**t1: ++** revb.2w \$r4,\$r4 ++** slli.w \$r4,\$r4,0 ++** jr \$r1 ++*/ ++unsigned int ++t1 (unsigned int x) ++{ ++ return __builtin_bswap32 (x); ++} ++ ++/* ++**t2: ++** revb.d \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t2 (unsigned long x) ++{ ++ return __builtin_bswap64 (x); ++} ++ ++/* ++**t3: ++** revb.2h \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned int ++t3 (unsigned int x) ++{ ++ return (x >> 8) & 0xff00ff | (x << 8) & 0xff00ff00; ++} ++ ++/* ++**t4: ++** revb.2w \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t4 (unsigned long x) ++{ ++ x = __builtin_bswap64 (x); ++ return x << 32 | x >> 32; ++} ++ ++/* ++**t5: ++** revb.2h \$r4,\$r4 ++** bstrpick.w \$r4,\$r4,15,0 ++** jr \$r1 ++*/ ++unsigned short ++t5 (unsigned short x) ++{ ++ return __builtin_bswap16 (x); ++} +-- +2.43.0 + diff --git a/0013-testsuite-fix-dg-do-preprocess-typo.patch b/0013-testsuite-fix-dg-do-preprocess-typo.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c0f314445f771976951d2cdbfb5700dc489ee97 --- /dev/null +++ b/0013-testsuite-fix-dg-do-preprocess-typo.patch @@ -0,0 +1,26 @@ +From a12e2e758da5307440b483b74e3ddeff5317c4e7 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Tue, 30 Jul 2024 21:46:29 +0100 +Subject: [PATCH 13/20] testsuite: fix 'dg-do-preprocess' typo + +We want 'dg-do preprocess', not 'dg-do-preprocess'. Fix that. + + PR target/106828 + * g++.target/loongarch/pr106828.C: Fix 'dg-do compile' typo. +--- + gcc/testsuite/g++.target/loongarch/pr106828.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C +index 190c1db71..0d13cbbd5 100644 +--- a/gcc/testsuite/g++.target/loongarch/pr106828.C ++++ b/gcc/testsuite/g++.target/loongarch/pr106828.C +@@ -1,4 +1,4 @@ +-/* { dg-do-preprocess } */ ++/* { dg-do preprocess } */ + /* { dg-options "-mabi=lp64d -fsanitize=address" } */ + + /* Tests whether the compiler supports compile option '-fsanitize=address'. */ +-- +2.43.0 + diff --git a/0014-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch b/0014-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfe475615248521758799e965fce27c3ade75c82 --- /dev/null +++ b/0014-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch @@ -0,0 +1,47 @@ +From 3b95d64e62dc1884da153f37a14753c3a74751e8 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Tue, 23 Jul 2024 10:04:26 +0800 +Subject: [PATCH 14/20] LoongArch: Remove gawk extension from a generator + script. + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: Do not use + "length()" to compute the size of an array. +--- + gcc/config/loongarch/genopts/gen-evolution.awk | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index 4d105afa9..1c8004e41 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -1,4 +1,4 @@ +-#!/usr/bin/gawk ++#!/usr/bin/awk -f + # + # A simple script that generates loongarch-evolution.h + # from genopts/isa-evolution.in +@@ -94,8 +94,9 @@ function gen_cpucfg_useful_idx() + idx_bucket[cpucfg_word[i]] = 1 + + delete idx_list ++ j = 1 + for (i in idx_bucket) +- idx_list[length(idx_list)-1] = i+0 ++ idx_list[j++] = i+0 + delete idx_bucket + + asort (idx_list) +@@ -108,7 +109,7 @@ function gen_cpucfg_useful_idx() + print "" + + printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", +- idx_list[length(idx_list)] + 1) ++ idx_list[j - 1] + 1) + + delete idx_list + } +-- +2.43.0 + diff --git a/0015-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch b/0015-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch new file mode 100644 index 0000000000000000000000000000000000000000..7339a1b9ea27f6ef6211b5b9f1f7dc81b322b571 --- /dev/null +++ b/0015-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch @@ -0,0 +1,226 @@ +From 4fccfea59b4924e9218dc0cb13093d26995dd6b4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 1 Aug 2024 16:07:25 +0800 +Subject: [PATCH 15/20] LoongArch: Use iorn and andn standard pattern names. + +R15-1890 introduced new optabs iorc and andc, and its corresponding +internal functions BIT_{ANDC,IORC}, and if targets defines such optabs +for vector modes. And in r15-2258 the iorc and andc were renamed to +iorn and andn. +So we changed the andn and iorn implementation templates to the standard +template names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xvandn3): Rename to ... + (andn3): This. + (xvorn3): Rename to ... + (iorn3): This. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vandn_v): Defined as the modified name. + (CODE_FOR_lsx_vorn_v): Likewise. + (CODE_FOR_lasx_xvandn_v): Likewise. + (CODE_FOR_lasx_xvorn_v): Likewise. + (loongarch_expand_builtin_insn): When the builtin function to be + called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the + two operands. + * config/loongarch/loongarch.md (n): Rename to ... + (n3): This. + * config/loongarch/lsx.md (vandn3): Rename to ... + (andn3): This. + (vorn3): Rename to ... + (iorn3): This. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-andn-iorn.c: New test. + * gcc.target/loongarch/lsx-andn-iorn.c: New test. +--- + gcc/config/loongarch/lasx.md | 10 +++---- + gcc/config/loongarch/loongarch-builtins.cc | 10 ++++--- + gcc/config/loongarch/loongarch.md | 8 +++--- + gcc/config/loongarch/lsx.md | 10 +++---- + .../gcc.target/loongarch/lasx-andn-iorn.c | 11 ++++++++ + .../gcc.target/loongarch/lsx-andn-iorn.c | 28 +++++++++++++++++++ + 6 files changed, 59 insertions(+), 18 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 7bd61f8ed..ca5238806 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -2716,12 +2716,12 @@ + (set_attr "mode" "V4DI")]) + + ;; Extend loongson-sx to loongson-asx. +-(define_insn "xvandn3" ++(define_insn "andn3" + [(set (match_operand:LASX 0 "register_operand" "=f") +- (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f")) +- (match_operand:LASX 2 "register_operand" "f")))] ++ (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f")) ++ (match_operand:LASX 1 "register_operand" "f")))] + "ISA_HAS_LASX" +- "xvandn.v\t%u0,%u1,%u2" ++ "xvandn.v\t%u0,%u2,%u1" + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +@@ -4637,7 +4637,7 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "xvorn3" ++(define_insn "iorn3" + [(set (match_operand:ILASX 0 "register_operand" "=f") + (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f")) + (match_operand:ILASX 1 "register_operand" "f")))] +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index fbe46833c..cf92770de 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d +-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3 ++#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3 ++#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3 + #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2 + #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2 + #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2 +@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si + #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di + +-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3 +-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3 ++#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3 ++#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3 + #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2 + #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2 + #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2 +@@ -2858,6 +2858,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lsx_vpickod_b: + case CODE_FOR_lsx_vpickod_h: + case CODE_FOR_lsx_vpickod_w: ++ case CODE_FOR_lsx_vandn_v: + case CODE_FOR_lasx_xvilvh_b: + case CODE_FOR_lasx_xvilvh_h: + case CODE_FOR_lasx_xvilvh_w: +@@ -2878,6 +2879,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lasx_xvpickod_b: + case CODE_FOR_lasx_xvpickod_h: + case CODE_FOR_lasx_xvpickod_w: ++ case CODE_FOR_lasx_xvandn_v: + /* Swap the operands 1 and 2 for interleave operations. Built-ins follow + convention of ISA, which have op1 as higher component and op2 as lower + component. However, the VEC_PERM op in tree and vec_concat in RTL +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ee0310f2b..261cb7d9d 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1701,13 +1701,13 @@ + [(set_attr "type" "logical") + (set_attr "mode" "SI")]) + +-(define_insn "n" ++(define_insn "n3" + [(set (match_operand:X 0 "register_operand" "=r") + (neg_bitwise:X +- (not:X (match_operand:X 1 "register_operand" "r")) +- (match_operand:X 2 "register_operand" "r")))] ++ (not:X (match_operand:X 2 "register_operand" "r")) ++ (match_operand:X 1 "register_operand" "r")))] + "" +- "n\t%0,%2,%1" ++ "n\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 454cda478..6bdf4fe43 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2344,12 +2344,12 @@ + } + [(set_attr "mode" "V4SF")]) + +-(define_insn "vandn3" ++(define_insn "andn3" + [(set (match_operand:LSX 0 "register_operand" "=f") +- (and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f")) +- (match_operand:LSX 2 "register_operand" "f")))] ++ (and:LSX (not:LSX (match_operand:LSX 2 "register_operand" "f")) ++ (match_operand:LSX 1 "register_operand" "f")))] + "ISA_HAS_LSX" +- "vandn.v\t%w0,%w1,%w2" ++ "vandn.v\t%w0,%w2,%w1" + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +@@ -3028,7 +3028,7 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "vorn3" ++(define_insn "iorn3" + [(set (match_operand:ILSX 0 "register_operand" "=f") + (ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f")) + (match_operand:ILSX 1 "register_operand" "f")))] +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +new file mode 100644 +index 000000000..4aa5f19a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +@@ -0,0 +1,11 @@ ++#define N 8 ++ ++#include "./lsx-andn-iorn.c" ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize" } */ ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +new file mode 100644 +index 000000000..7bceccd37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ftree-vectorize" } */ ++ ++#ifndef N ++#define N 4 ++#endif ++ ++extern float a[N], b[N]; ++extern int c[N], d[N]; ++ ++void ++bar1 (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = a[i] > b[i] ? 0 : c[i]; ++} ++ ++void ++bar2 (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = a[i] > b[i] ? c[i]: -1; ++} ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +-- +2.43.0 + diff --git a/0016-LoongArch-Drop-vcond-u-expanders.patch b/0016-LoongArch-Drop-vcond-u-expanders.patch new file mode 100644 index 0000000000000000000000000000000000000000..c79745baa4253fb0a5753fb457695e035a80afb8 --- /dev/null +++ b/0016-LoongArch-Drop-vcond-u-expanders.patch @@ -0,0 +1,127 @@ +From 3e2a8ca412a30756b1b2883a962e61e45a9ed5f6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 8 Aug 2024 10:39:54 +0800 +Subject: [PATCH 16/20] LoongArch: Drop vcond{,u} expanders. + +Optabs vcond{,u} will be removed for GCC 15. Since regtest shows no +fallout, dropping the expanders, now. + +gcc/ChangeLog: + + PR target/114189 + * config/loongarch/lasx.md (vcondu): Delete. + (vcond): Likewise. + * config/loongarch/lsx.md (vcondu): Likewise. + (vcond): Likewise. +--- + gcc/config/loongarch/lasx.md | 37 ------------------------------------ + gcc/config/loongarch/lsx.md | 31 ------------------------------ + 2 files changed, 68 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index ca5238806..d37b2e83c 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -165,9 +165,6 @@ + ;; All vector modes with 256 bits. + (define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI]) + +-;; Same as LASX. Used by vcond to iterate two modes. +-(define_mode_iterator LASX_2 [V4DF V8SF V4DI V8SI V16HI V32QI]) +- + ;; Only used for splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LASX_D [V4DI V4DF]) + +@@ -762,40 +759,6 @@ + DONE; + }) + +-;; FIXME: 256?? +-(define_expand "vcondu" +- [(match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:ILASX 4 "register_operand") +- (match_operand:ILASX 5 "register_operand")])] +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (mode) +- == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, +- operands); +- DONE; +-}) +- +-;; FIXME: 256?? +-(define_expand "vcond" +- [(match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:LASX_2 4 "register_operand") +- (match_operand:LASX_2 5 "register_operand")])] +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (mode) +- == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, +- operands); +- DONE; +-}) +- + ;; Same as vcond_ + (define_expand "vcond_mask_" + [(match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 6bdf4fe43..fcba28b07 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -186,9 +186,6 @@ + ;; All vector modes with 128 bits. + (define_mode_iterator LSX [V2DF V4SF V2DI V4SI V8HI V16QI]) + +-;; Same as LSX. Used by vcond to iterate two modes. +-(define_mode_iterator LSX_2 [V2DF V4SF V2DI V4SI V8HI V16QI]) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D [V2DI V2DF]) + +@@ -533,34 +530,6 @@ + DONE; + }) + +-(define_expand "vcondu" +- [(match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:ILSX 4 "register_operand") +- (match_operand:ILSX 5 "register_operand")])] +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, operands); +- DONE; +-}) +- +-(define_expand "vcond" +- [(match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:LSX_2 4 "register_operand") +- (match_operand:LSX_2 5 "register_operand")])] +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, operands); +- DONE; +-}) +- + (define_expand "vcond_mask_" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "reg_or_m1_operand") +-- +2.43.0 + diff --git a/0017-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch b/0017-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch new file mode 100644 index 0000000000000000000000000000000000000000..bd29caad701afe4d4d4b1e4452b2c04c6862c783 --- /dev/null +++ b/0017-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch @@ -0,0 +1,220 @@ +From 41ff0f0c55e43ea0ab6f3f588a9193c56b217388 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 8 Aug 2024 09:59:28 +0800 +Subject: [PATCH 17/20] LoongArch: Provide ashr lshr and ashl RTL pattern for + vectors. + +We support vashr vlshr and vashl. However, in r15-1638 support optimize +x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31. +To support this optimization, vector ashr lshr and ashl need to be implemented. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (insn): Added rotatert rotr pairs. + * config/loongarch/simd.md (rotr3): Remove to ... + (3): This. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-ashr-lshr.C: New test. +--- + gcc/config/loongarch/loongarch.md | 1 + + gcc/config/loongarch/simd.md | 13 +- + .../g++.target/loongarch/vect-ashr-lshr.C | 147 ++++++++++++++++++ + 3 files changed, 155 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 261cb7d9d..73cdb38a4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -559,6 +559,7 @@ + (define_code_attr insn [(ashift "sll") + (ashiftrt "sra") + (lshiftrt "srl") ++ (rotatert "rotr") + (ior "or") + (xor "xor") + (and "and") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 00ff2823a..45ea11422 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -306,14 +306,15 @@ + operands[4] = gen_reg_rtx (mode); + }); + +-;; vrotri.{b/h/w/d} ++;; v{rotr/sll/sra/srl}i.{b/h/w/d} + +-(define_insn "rotr3" ++(define_insn "3" + [(set (match_operand:IVEC 0 "register_operand" "=f") +- (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") +- (match_operand:SI 2 "const__operand")))] +- "" +- "vrotri.\t%0,%1,%2"; ++ (shift_w:IVEC ++ (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const__operand")))] ++ "ISA_HAS_LSX" ++ "vi.\t%0,%1,%2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +new file mode 100644 +index 000000000..bcef985fa +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +@@ -0,0 +1,147 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O2" } */ ++/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */ ++ ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef signed char v32qi __attribute__((vector_size(32))); ++typedef short v8hi __attribute__((vector_size(16))); ++typedef short v16hi __attribute__((vector_size(32))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef int v8si __attribute__((vector_size(32))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef long long v4di __attribute__((vector_size(32))); ++ ++v16qi ++foo (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo2 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo3 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo4 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo5 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){1,1,1,1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo6 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo7 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){1,1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo8 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){1,1,1,1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16qi ++foo9 (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo10 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo11 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo12 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo13 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){-1,-1,-1,-1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo14 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo15 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){-1,-1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo16 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){-1,-1,-1,-1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} +-- +2.43.0 + diff --git a/0018-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch b/0018-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch new file mode 100644 index 0000000000000000000000000000000000000000..7621550116958d012cd52e28462d2cfa0e03351f --- /dev/null +++ b/0018-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch @@ -0,0 +1,203 @@ +From 1d08e8d3041c102154001b1813ca13e4886048eb Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 4 Jul 2024 02:49:28 +0800 +Subject: [PATCH 18/20] LoongArch: Implement scalar isinf, isnormal, and + isfinite via fclass + +Doing so can avoid loading FP constants from the memory. It also +partially fixes PR 66262 as fclass does not signal on sNaN. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f") + alternative and use movfr2gr.s for it. The spec clearly states + movfr2gr.s sign extends the value to GRLEN. + (fclass_): Make the result SImode instead of a floating + mode. The fclass results are really not FP values. + (FCLASS_MASK): New define_int_iterator. + (fclass_optab): New define_int_attr. + (): New define_expand + template. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/fclass-compile.c: New test. + * gcc.target/loongarch/fclass-run.c: New test. +--- + gcc/config/loongarch/loongarch.md | 53 ++++++++++++++++--- + .../gcc.target/loongarch/fclass-compile.c | 20 +++++++ + .../gcc.target/loongarch/fclass-run.c | 53 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 73cdb38a4..f70ca85bf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1851,16 +1851,17 @@ + ;; .................... + + (define_insn "extendsidi2" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (sign_extend:DI +- (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))] ++ (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f")))] + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + ldptr.w\t%0,%1 + ld.w\t%0,%1 +- ldx.w\t%0,%1" +- [(set_attr "move_type" "sll0,load,load,load") ++ ldx.w\t%0,%1 ++ movfr2gr.s\t%0,%1" ++ [(set_attr "move_type" "sll0,load,load,load,mftg") + (set_attr "mode" "DI")]) + + (define_insn "extend2" +@@ -4110,14 +4111,52 @@ + "movgr2fcsr\t$r%0,%1") + + (define_insn "fclass_" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] +- UNSPEC_FCLASS))] ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (unspec:SI [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_FCLASS))] + "TARGET_HARD_FLOAT" + "fclass.\t%0,%1" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + ++(define_int_iterator FCLASS_MASK [68 136 952]) ++(define_int_attr fclass_optab ++ [(68 "isinf") ++ (136 "isnormal") ++ (952 "isfinite")]) ++ ++(define_expand "2" ++ [(match_operand:SI 0 "register_operand" "=r") ++ (match_operand:ANYF 1 "register_operand" " f") ++ (const_int FCLASS_MASK)] ++ "TARGET_HARD_FLOAT" ++ { ++ rtx ft0 = gen_reg_rtx (SImode); ++ rtx t0 = gen_reg_rtx (word_mode); ++ rtx mask = GEN_INT (); ++ ++ emit_insn (gen_fclass_ (ft0, operands[1])); ++ ++ if (TARGET_64BIT) ++ emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0)); ++ else ++ emit_move_insn (t0, ft0); ++ ++ emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask)); ++ emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx)); ++ ++ if (TARGET_64BIT) ++ { ++ t0 = lowpart_subreg (SImode, t0, DImode); ++ SUBREG_PROMOTED_VAR_P (t0) = 1; ++ SUBREG_PROMOTED_SET (t0, SRP_SIGNED); ++ } ++ ++ emit_move_insn (operands[0], t0); ++ ++ DONE; ++ }) ++ + (define_insn "bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +new file mode 100644 +index 000000000..9c24d6e26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler-times "fclass\\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "fclass\\.d" 1 } } */ ++ ++__attribute__ ((noipa)) int ++test_fclass_f (float f) ++{ ++ return __builtin_isinf (f) ++ | __builtin_isnormal (f) << 1 ++ | __builtin_isfinite (f) << 2; ++} ++ ++__attribute__ ((noipa)) int ++test_fclass_d (double d) ++{ ++ return __builtin_isinf (d) ++ | __builtin_isnormal (d) << 1 ++ | __builtin_isfinite (d) << 2; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-run.c b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +new file mode 100644 +index 000000000..e5585f9d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +@@ -0,0 +1,53 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fsignaling-nans -D_GNU_SOURCE -std=c23" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include ++#include "fclass-compile.c" ++ ++#define ASSERT_EQ(x, y) (void)(x == y || (__builtin_abort (), 1)) ++ ++int ++main (void) ++{ ++ volatile float f_inf = __builtin_inff (); ++ volatile float f_zero = 0; ++ volatile float f_normal = 114.514; ++ volatile float f_subnormal = 1e-40; ++ volatile float f_qnan = __builtin_nanf (""); ++ volatile float f_snan = __builtin_nansf (""); ++ volatile double d_inf = __builtin_inf (); ++ volatile double d_zero = 0; ++ volatile double d_normal = 1919.810; ++ volatile double d_subnormal = 1e-320; ++ volatile double d_qnan = __builtin_nan (""); ++ volatile double d_snan = __builtin_nans (""); ++ ++#if __loongarch_frlen >= 64 ++ /* With fclass.{s/d} we shouldn't signal, even if the input is sNaN. ++ PR 66462. */ ++ feenableexcept (FE_INVALID); ++#endif ++ ++ ASSERT_EQ (test_fclass_f (f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (-f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (-f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (f_qnan), 0); ++ ASSERT_EQ (test_fclass_f (f_snan), 0); ++ ++ ASSERT_EQ (test_fclass_d (d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (-d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (-d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (d_qnan), 0); ++ ASSERT_EQ (test_fclass_d (d_snan), 0); ++} +-- +2.43.0 + diff --git a/0019-LoongArch-Add-support-to-annotate-tablejump.patch b/0019-LoongArch-Add-support-to-annotate-tablejump.patch new file mode 100644 index 0000000000000000000000000000000000000000..79e3d7bcc5b414fcf7d52ef2d680f1ca61582e5d --- /dev/null +++ b/0019-LoongArch-Add-support-to-annotate-tablejump.patch @@ -0,0 +1,123 @@ +From f0416e6a2af1fb1f2b18a4410e679f25c57c5e9f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 11 Jul 2024 19:43:48 +0800 +Subject: [PATCH 19/20] LoongArch: Add support to annotate tablejump + +This is per the request from the kernel developers. For generating the +ORC unwind info, the objtool program needs to analysis the control flow +of a .o file. If a jump table is used, objtool has to correlate the +jump instruction with the table. + +On x86 (where objtool was initially developed) it's simple: a relocation +entry natrually correlates them because one single instruction is used +for table-based jump. But on an RISC machine objtool would have to +reconstruct the data flow if it must find out the correlation on its +own. + +So, emit an additional section to store the correlation info as pairs of +addresses, each pair contains the address of a jump instruction (jr) and +the address of the jump table. This is very trivial to implement in +GCC. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in + (mannotate-tablejump): New option. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.md (tablejump): Emit + additional correlation info between the jump instruction and the + jump table, if -mannotate-tablejump. + * doc/invoke.texi: Document -mannotate-tablejump. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/jump-table-annotate.c: New test. + +Suggested-by: Tiezhu Yang +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ + gcc/config/loongarch/loongarch.md | 12 +++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++++ + .../gcc.target/loongarch/jump-table-annotate.c | 15 +++++++++++++++ + 4 files changed, 34 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index d00950cb4..d5bbf01d8 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -301,3 +301,7 @@ default value is 4. + ; CPUCFG independently, so we use bit flags to specify them. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 ++ ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index f70ca85bf..bd0825002 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3496,12 +3496,22 @@ + DONE; + }) + ++(define_mode_attr mode_size [(DI "8") (SI "4")]) ++ + (define_insn "@tablejump" + [(set (pc) + (match_operand:P 0 "register_operand" "e")) + (use (label_ref (match_operand 1 "" "")))] + "" +- "jr\t%0" ++ { ++ return TARGET_ANNOTATE_TABLEJUMP ++ ? "1:jr\t%0\n\t" ++ ".pushsection\t.discard.tablejump_annotate\n\t" ++ "\t.byte\t1b\n\t" ++ "\t.byte\t%1\n\t" ++ ".popsection" ++ : "jr\t%0"; ++ } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 91cb5236a..6a396b539 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -310,6 +310,10 @@ default value is 4. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 + ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++ + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. +diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +new file mode 100644 +index 000000000..9d58e60e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-mannotate-tablejump" } */ ++ ++extern void asdf(int); ++void foo(int x) { ++ switch (x) { ++ case 0: asdf(10); break; ++ case 1: asdf(11); break; ++ case 2: asdf(12); break; ++ case 3: asdf(13); break; ++ case 4: asdf(14); break; ++ } ++} ++ ++/* { dg-final { scan-assembler "\\.discard\\.tablejump_annotate" } } */ +-- +2.43.0 + diff --git a/0020-LoongArch-Fix-up-r15-4130.patch b/0020-LoongArch-Fix-up-r15-4130.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ed2640b5dd02475666941e89ba84d04662590cb --- /dev/null +++ b/0020-LoongArch-Fix-up-r15-4130.patch @@ -0,0 +1,32 @@ +From ff6fe2101c559b80e6f7c6f4e92a8732f20a28f0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 10 Jul 2024 12:15:23 +0800 +Subject: [PATCH 20/20] LoongArch: Fix up r15-4130 + +An earlier version of the patch (lacking the regeneration of some files) +was pushed. Fix it up now. + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 6a396b539..fae575452 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -312,7 +312,7 @@ HOST_WIDE_INT la_isa_evolution = 0 + + mannotate-tablejump + Target Mask(ANNOTATE_TABLEJUMP) Save +-Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. + + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) +-- +2.43.0 + diff --git a/0021-LoongArch-Change-OSDIR-for-distribution.patch b/0021-LoongArch-Change-OSDIR-for-distribution.patch new file mode 100644 index 0000000000000000000000000000000000000000..7853b786b34012247671c4a76816904f96c7faf2 --- /dev/null +++ b/0021-LoongArch-Change-OSDIR-for-distribution.patch @@ -0,0 +1,25 @@ +From 25423cf92026221b7c8798533c40d3e6269a1d7c Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Thu, 31 Oct 2024 02:01:49 +0000 +Subject: [PATCH] LoongArch: Change OSDIR for distribution + +Signed-off-by: Peng Fan +--- + gcc/config/loongarch/t-linux | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 7cd7cde25..1d1f42596 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -28,4 +28,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) + MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + ++else ++ MULTILIB_OSDIRNAMES := ../lib64 ++ MULTILIB_OPTIONS = mabi=lp64d + endif +-- +2.45.2 + diff --git a/0022-LoongArch-support-nonshared-extfloat.diff b/0022-LoongArch-support-nonshared-extfloat.diff new file mode 100644 index 0000000000000000000000000000000000000000..ac4c76dc250074a619e733c7d7707bce4fbd04bb --- /dev/null +++ b/0022-LoongArch-support-nonshared-extfloat.diff @@ -0,0 +1,18 @@ +diff --git a/libstdc++-v3/src/nonshared98/extfloat.S b/libstdc++-v3/src/nonshared98/extfloat.S +index b6e4164b5..bedada6d8 100644 +--- a/libstdc++-v3/src/nonshared98/extfloat.S ++++ b/libstdc++-v3/src/nonshared98/extfloat.S +@@ -56,8 +56,12 @@ + #elif defined __riscv && __riscv_xlen == 64 + #define ALIGN1 .align 3 + #define ALIGN3 .align 3 ++#elif defined __loongarch64 ++#define ALIGN1 .align 3 ++#define ALIGN3 .align 3 + #endif +-#if defined __x86_64__ || defined __powerpc64__ || defined __s390x__ || defined __ia64__ || defined __aarch64__ || (defined __riscv && __riscv_xlen == 64) ++#if defined __x86_64__ || defined __powerpc64__ || defined __s390x__ || defined __ia64__ || defined __aarch64__ || (defined __riscv && __riscv_xlen == 64) \ ++ || defined __loongarch64 + #define SIZE1 32 + #define SIZE2 16 + #define OFF 16 diff --git a/0023-LoongArch-libstdcxx-nonshared.diff b/0023-LoongArch-libstdcxx-nonshared.diff new file mode 100644 index 0000000000000000000000000000000000000000..feb4492f10aba29375572ef7148c97c97c5f2c9a --- /dev/null +++ b/0023-LoongArch-libstdcxx-nonshared.diff @@ -0,0 +1,205 @@ +diff --git a/libstdc++-v3/src/nonshared17/cow-fs_dir.cc b/libstdc++-v3/src/nonshared17/cow-fs_dir.cc +index 9525952b0..3a1c52edc 100644 +--- a/libstdc++-v3/src/nonshared17/cow-fs_dir.cc ++++ b/libstdc++-v3/src/nonshared17/cow-fs_dir.cc +@@ -110,4 +110,6 @@ asm (".hidden _ZNKSt10filesystem28recursive_directory_iterator10_Dir_stack12curr + //asm (".hidden _ZNSt10filesystem4pathC1ISsS0_EERKT_NS0_6formatE"); + asm (".hidden _ZNSt10filesystem28recursive_directory_iterator7__eraseEPSt10error_code"); + //asm (".hidden _ZNKSt10filesystem4_Dir16dir_and_pathnameEv"); ++#ifndef __loongarch64 + asm (".hidden _ZNKSt10filesystem4_Dir7currentEv"); ++#endif +diff --git a/libstdc++-v3/src/nonshared17/cow-fs_ops.cc b/libstdc++-v3/src/nonshared17/cow-fs_ops.cc +index 100565f43..0f9bec9be 100644 +--- a/libstdc++-v3/src/nonshared17/cow-fs_ops.cc ++++ b/libstdc++-v3/src/nonshared17/cow-fs_ops.cc +@@ -88,4 +88,6 @@ asm (".hidden _ZNKSt10filesystem4path8filenameEv"); + //asm (".hidden _ZSt16__do_uninit_copyINSt10filesystem4path8iteratorESt15_Deque_iteratorIS1_RS1_PS1_EET0_T_S8_S7_"); + //asm (".hidden _ZSt4copyINSt10filesystem4path8iteratorESt15_Deque_iteratorIS1_RS1_PS1_EET0_T_S8_S7_"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSs4swapERSs"); ++#endif +diff --git a/libstdc++-v3/src/nonshared17/cow-fs_path.cc b/libstdc++-v3/src/nonshared17/cow-fs_path.cc +index 630646d8e..954a09d3e 100644 +--- a/libstdc++-v3/src/nonshared17/cow-fs_path.cc ++++ b/libstdc++-v3/src/nonshared17/cow-fs_path.cc +@@ -93,28 +93,36 @@ asm (".hidden _ZNKSt10filesystem4path5_List5_Impl4copyEv"); + #ifndef __riscv + asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE24_M_release_last_use_coldEv"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE6resizeEmw"); + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE7reserveEm"); + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE9_M_mutateEmmm"); + asm (".hidden _ZNSs6insertEmPKcm"); + asm (".hidden _ZNSs6resizeEmc"); +-asm (".hidden _ZNSs7reserveEm"); + asm (".hidden _ZNSs9_M_mutateEmmm"); + asm (".hidden _ZNSsC1ERKSsmm"); +-#ifndef __riscv ++asm (".hidden _ZNSs7reserveEm"); ++#endif ++#if !defined __riscv && !defined __loongarch64 + asm (".hidden _ZNSsC2ERKSsmm"); + asm (".hidden _ZNSt10filesystem4pathD2Ev"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZSt16__do_str_codecvtISbIwSt11char_traitsIwESaIwEEcSt7codecvtIwc11__mbstate_tES5_MS6_KFNSt12codecvt_base6resultERS5_PKcSB_RSB_PwSD_RSD_EEbPKT0_SJ_RT_RKT1_RT2_RmT3_"); ++#endif + //asm (".hidden _ZSt16__do_str_codecvtISswSt7codecvtIwc11__mbstate_tES1_MS2_KFNSt12codecvt_base6resultERS1_PKwS7_RS7_PcS9_RS9_EEbPKT0_SF_RT_RKT1_RT2_RmT3_"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE12_M_leak_hardEv"); + asm (".hidden _ZNSt10filesystem4path5_List5beginEv"); +-#ifndef __s390x__ ++#endif ++#if !defined(__s390x__) && !defined(__loongarch64) + asm (".hidden _ZNSt10filesystem4path7_Parser4nextEv"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSt10filesystem4pathD1Ev"); + asm (".hidden _ZNSs12_M_leak_hardEv"); ++#endif + #ifdef __i386__ + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE6resizeEjw"); + asm (".hidden _ZNSbIwSt11char_traitsIwESaIwEE7reserveEj"); +diff --git a/libstdc++-v3/src/nonshared17/floating_from_chars.cc b/libstdc++-v3/src/nonshared17/floating_from_chars.cc +index 4ef0871a4..0895e120a 100644 +--- a/libstdc++-v3/src/nonshared17/floating_from_chars.cc ++++ b/libstdc++-v3/src/nonshared17/floating_from_chars.cc +@@ -25,9 +25,11 @@ + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_createERjj"); + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEjjPKcj"); + #else ++#ifndef __loongarch64 + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_createERmm"); + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEmmPKcm"); + #endif ++#endif + #if defined(__s390x__) || defined(__powerpc64__) + //asm (".hidden _ZSt10from_charsPKcS0_RgSt12chars_format"); + #endif +@@ -43,8 +45,10 @@ asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphi + #if defined(__i386__) || (defined(__powerpc__) && !defined(__powerpc64__)) + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcjPKcjj"); + #else ++#ifndef __loongarch64 + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcmPKcmm"); + #endif ++#endif + #ifndef __riscv + asm (".hidden _ZNSt8__detail31__from_chars_alnum_to_val_tableILb0EE5valueE"); + #endif +diff --git a/libstdc++-v3/src/nonshared17/floating_from_chars110.cc b/libstdc++-v3/src/nonshared17/floating_from_chars110.cc +index 3c7cd9610..7a46c19fd 100644 +--- a/libstdc++-v3/src/nonshared17/floating_from_chars110.cc ++++ b/libstdc++-v3/src/nonshared17/floating_from_chars110.cc +@@ -29,9 +29,11 @@ + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcjPKcjj"); + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEjjPKcj"); + #else ++#ifndef __loongarch64 + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE15_M_replace_coldEPcmPKcmm"); + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcENSt3pmr21polymorphic_allocatorIcEEE9_M_mutateEmmPKcm"); + #endif ++#endif + #ifndef __riscv + asm (".hidden _ZNSt8__detail31__from_chars_alnum_to_val_tableILb0EE5valueE"); + #endif +diff --git a/libstdc++-v3/src/nonshared17/floating_to_chars110.cc b/libstdc++-v3/src/nonshared17/floating_to_chars110.cc +index de9465513..b2ef2edce 100644 +--- a/libstdc++-v3/src/nonshared17/floating_to_chars110.cc ++++ b/libstdc++-v3/src/nonshared17/floating_to_chars110.cc +@@ -24,7 +24,7 @@ + #include "../c++17/floating_to_chars.cc" + //asm (".hidden _ZSt12__to_chars_iIoENSt9enable_ifIXsrSt5__or_IJS1_IJSt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IJS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt15to_chars_resultE4typeEPcSQ_S4_i"); + //asm (".hidden _ZSt12__to_chars_iIoENSt9enable_ifIXsrSt5__or_IIS1_IISt7is_sameINSt9remove_cvIT_E4typeEaES2_IS6_sES2_IS6_iES2_IS6_lES2_IS6_xES2_IS6_nEEES1_IIS2_IS6_hES2_IS6_tES2_IS6_jES2_IS6_mES2_IS6_yES2_IS6_oEEES2_IcS6_EEE5valueESt15to_chars_resultE4typeEPcSQ_S4_i"); +-#ifndef __riscv ++#if !defined(__riscv) && !defined(__loongarch64) + asm (".hidden _ZNSt8__detail18__to_chars_10_implIjEEvPcjT_"); + #endif + #if !defined(__i386__) +diff --git a/libstdc++-v3/src/nonshared17/fs_dir.cc b/libstdc++-v3/src/nonshared17/fs_dir.cc +index 655b04794..12d96ae35 100644 +--- a/libstdc++-v3/src/nonshared17/fs_dir.cc ++++ b/libstdc++-v3/src/nonshared17/fs_dir.cc +@@ -43,8 +43,10 @@ asm (".hidden _ZNSt10filesystem9_Dir_base7advanceEbRSt10error_code"); + //asm (".hidden _ZNSt10filesystem9_Dir_baseC1EPKcbRSt10error_code"); + //asm (".hidden _ZNSt10filesystem9_Dir_baseC2EPKcbRSt10error_code"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSt10filesystem7__cxx114pathC1INSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES1_EERKT_NS1_6formatE"); +-#ifndef __riscv ++#endif ++#if !defined(__riscv) && !defined(__loongarch64) + asm (".hidden _ZNSt10filesystem7__cxx114pathC2INSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES1_EERKT_NS1_6formatE"); + asm (".hidden _ZNSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EE4swapERS6_"); + asm (".hidden _ZNSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EE5resetEv"); +@@ -99,7 +101,7 @@ asm (".hidden _ZNSt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx1128recursive_ + asm (".hidden _ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag"); + asm (".hidden _ZNSt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx114_DirESaIS2_ELN9__gnu_cxx12_Lock_policyE2EED2Ev"); + #endif +-#if !defined__i386__ && !defined __riscv ++#if !defined__i386__ && !defined __riscv && !defined __loongarch64 + asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE24_M_release_last_use_coldEv"); + //asm (".hidden _ZNSt5dequeINSt10filesystem7__cxx114_DirESaIS2_EE17_M_reallocate_mapEmb"); + #endif +@@ -114,6 +116,8 @@ asm (".hidden _ZNSt10filesystem9_Dir_base7advanceEbRSt10error_code"); + asm (".hidden _ZNKSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stack12current_pathEv"); + asm (".hidden _ZNSt10filesystem7__cxx1128recursive_directory_iterator7__eraseEPSt10error_code"); + //asm (".hidden _ZNKSt10filesystem7__cxx114_Dir16dir_and_pathnameEv"); ++#ifndef __loongarch64 + asm (".hidden _ZNKSt10filesystem7__cxx114_Dir7currentEv"); ++#endif + //asm (".hidden _ZNSt10filesystem7__cxx114_DirC1ERKNS0_4pathEbbbRSt10error_code"); + //asm (".hidden _ZNSt10filesystem7__cxx114_DirC2ERKNS0_4pathEbbbRSt10error_code"); +diff --git a/libstdc++-v3/src/nonshared17/fs_path80.cc b/libstdc++-v3/src/nonshared17/fs_path80.cc +index 16576bbd3..b5e9f1a79 100644 +--- a/libstdc++-v3/src/nonshared17/fs_path80.cc ++++ b/libstdc++-v3/src/nonshared17/fs_path80.cc +@@ -128,12 +128,16 @@ asm (".hidden _ZNSt12system_errorC1ESt10error_codeRKNSt7__cxx1112basic_stringIcS + asm (".hidden _ZNSt12system_errorC2ESt10error_codeRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"); + #endif + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_"); ++#ifndef __loongarch64 + asm (".hidden _ZNSt10filesystem7__cxx114path5_List5beginEv"); ++#endif + #ifndef __i386__ + //asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE6resizeEmw"); + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm"); + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm"); ++#ifndef __loongarch64 + asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6resizeEmc"); ++#endif + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm"); + //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm"); + #ifndef __riscv +diff --git a/libstdc++-v3/src/nonshared17/memory_resource.cc b/libstdc++-v3/src/nonshared17/memory_resource.cc +index 36bfac568..cb1eb14de 100644 +--- a/libstdc++-v3/src/nonshared17/memory_resource.cc ++++ b/libstdc++-v3/src/nonshared17/memory_resource.cc +@@ -73,7 +73,9 @@ asm (".hidden _ZNSt3pmr28unsynchronized_pool_resource12_M_find_poolEj"); + #ifdef __powerpc64__ + //asm (".hidden _ZNSt3pmr15__pool_resource5_Pool10deallocateEPNS_15memory_resourceEPv"); + #endif ++#ifndef __loongarch64 + asm (".hidden _ZNSt22__shared_mutex_pthread6unlockEv"); ++#endif + #if defined(__i386__) || (defined(__powerpc__) && !defined(__powerpc64__)) + asm (".hidden _ZNSt6vectorINSt3pmr15__pool_resource9_BigBlockENS0_21polymorphic_allocatorIS2_EEE17_M_realloc_appendIIRjS7_EEEvDpOT_"); + asm (".hidden _ZNSt6vectorINSt3pmr15__pool_resource9_BigBlockENS0_21polymorphic_allocatorIS2_EEE17_M_realloc_appendIJRjS7_EEEvDpOT_"); +diff --git a/libstdc++-v3/src/nonshared20/tzdb80.cc b/libstdc++-v3/src/nonshared20/tzdb80.cc +index 1e88dba3b..1a3f769ac 100644 +--- a/libstdc++-v3/src/nonshared20/tzdb80.cc ++++ b/libstdc++-v3/src/nonshared20/tzdb80.cc +@@ -149,7 +149,9 @@ asm (".hidden _ZTSSt19_Sp_make_shared_tag"); + asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE24_M_release_last_use_coldEv"); + #endif + #endif ++#ifndef __loongarch64 + asm (".hidden _ZSt23__atomic_wait_address_vIiZNKSt13__atomic_baseIiE4waitEiSt12memory_orderEUlvE_EvPKT_S4_T0_"); ++#endif + #ifndef __riscv + asm (".hidden _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE10_M_releaseEv"); + asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED2Ev"); diff --git a/gcc-14.spec b/gcc-14.spec index db1e941e9a88d0798938e59911e13eebda55a9c1..9cb2d327bea60233b2b5a61b2b0ec8ceeb3d9f8b 100644 --- a/gcc-14.spec +++ b/gcc-14.spec @@ -36,7 +36,7 @@ %else %global build_libquadmath 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 riscv64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 riscv64 loongarch64 %global build_libasan 1 %else %global build_libasan 0 @@ -46,33 +46,33 @@ %else %global build_libhwasan 0 %endif -%ifarch x86_64 ppc64 ppc64le aarch64 s390x riscv64 +%ifarch x86_64 ppc64 ppc64le aarch64 s390x riscv64 loongarch64 %global build_libtsan 1 %else %global build_libtsan 0 %endif -%ifarch x86_64 ppc64 ppc64le aarch64 s390x riscv64 +%ifarch x86_64 ppc64 ppc64le aarch64 s390x riscv64 loongarch64 %global build_liblsan 1 %else %global build_liblsan 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 riscv64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 riscv64 loongarch64 %global build_libubsan 1 %else %global build_libubsan 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 loongarch64 %global build_libatomic 1 %else %global build_libatomic 0 %endif -%ifarch %{ix86} x86_64 %{arm} alpha ppc ppc64 ppc64le ppc64p7 s390 s390x aarch64 riscv64 +%ifarch %{ix86} x86_64 %{arm} alpha ppc ppc64 ppc64le ppc64p7 s390 s390x aarch64 riscv64 loongarch64 %global build_libitm 1 %else %global build_libitm 0 %endif %global build_libstdcxx_docs 0 -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 loongarch64 %global attr_ifunc 1 %else %global attr_ifunc 0 @@ -90,11 +90,10 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?_scl_prefix}gcc%{gcc_ver} Version: 14.2.1 -Release: 7 +Release: 8 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD -ExcludeArch: loongarch64 Source0: https://ftp.gnu.org/gnu/gcc/gcc-14.2.0/gcc-14.2.0.tar.xz URL: http://gcc.gnu.org @@ -196,6 +195,30 @@ Patch1024: GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch Patch1025: GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch Patch1026: GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch +Patch3000: 0001-LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch +Patch3001: 0002-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch +Patch3002: 0003-LoongArch-Use-bstrins-for-value-1u-const.patch +Patch3003: 0004-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch +Patch3004: 0005-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch +Patch3005: 0006-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch +Patch3006: 0007-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch +Patch3007: 0008-LoongArch-Remove-unreachable-codes.patch +Patch3008: 0009-LoongArch-Organize-the-code-related-to-split-move-an.patch +Patch3009: 0010-LoongArch-Expand-some-SImode-operations-through-si3_.patch +Patch3010: 0011-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch +Patch3011: 0012-LoongArch-Rework-bswap-hi-si-di-2-definition.patch +Patch3012: 0013-testsuite-fix-dg-do-preprocess-typo.patch +Patch3013: 0014-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch +Patch3014: 0015-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch +Patch3015: 0016-LoongArch-Drop-vcond-u-expanders.patch +Patch3016: 0017-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch +Patch3017: 0018-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch +Patch3018: 0019-LoongArch-Add-support-to-annotate-tablejump.patch +Patch3019: 0020-LoongArch-Fix-up-r15-4130.patch +Patch2020: 0021-LoongArch-Change-OSDIR-for-distribution.patch +Patch3021: 0022-LoongArch-support-nonshared-extfloat.diff +Patch3022: 0023-LoongArch-libstdcxx-nonshared.diff + # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. %global nonsharedver 80 @@ -591,6 +614,9 @@ CONFIGURE_OPTS="\ %ifnarch sparc sparcv9 ppc --build=%{gcc_target_platform} \ %endif +%ifarch loongarch64 + --disable-libquadmath --enable-tls --enable-default-pie \ +%endif %if 0%{?scl:1} --program-suffix=%{binsuffix} %endif @@ -1719,6 +1745,17 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/riscv_bitmanip.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/riscv_th_vector.h %endif +%ifarch loongarch64 +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/larchintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-protos.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-opts.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-str.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-def.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-tune.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/loongarch/loongarch-driver.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lsxintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/lasxintrin.h +%endif %if %{build_libasan} %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/sanitizer %endif @@ -2225,6 +2262,9 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Mon Mar 3 2025 Peng Fan - 14.2.1-8 +- LoongArch: sync from upstream. + * Mon Feb 17 2025 Hu, Lin - 14.2.1-7 - [Sync] Sync patches from gcc.gnu.org's releases/gcc-14