From b2c2811c8745764d0fc93e4f674efabdab64c761 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 4 Aug 2025 15:46:03 +0800 Subject: [PATCH] LoongArch: common sync from upstream Signed-off-by: Peng Fan --- ...Arch-Fix-soft-float-builds-of-libffi.patch | 34 + ...ON_RELRO-for-or-.data.rel.ro-.local-.patch | 89 ++ ...uite-Add-O-for-jump-table-annotate.c.patch | 32 + ..._builtin_lsx_vorn_v-and-__builtin_la.patch | 127 +++ ...erical-errors-in-lasx_xvreplgr2vr_-a.patch | 47 + 0006-LoongArch-Remove-redundant-code.patch | 171 ++++ ...ite-Fix-loongarch-vect-frint-scalar..patch | 41 + ...rch-testsuite-Fix-l-a-sx-andn-iorn.c.patch | 45 + ...hift-offset-when-emit-xv-v-srl-sll-s.patch | 448 ++++++++ ...rch-Remove-QHSD-and-use-QHWD-instead.patch | 49 + ...LoongArch-Add-bit-reverse-operations.patch | 90 ++ ...-CRC-expander-to-generate-faster-CRC.patch | 88 ++ ...rch-Combine-xor-and-crc-instructions.patch | 83 ++ 0014-LoongArch-Add-crc-tests.patch | 164 +++ ...ent-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch | 78 ++ ...upport-immediate_operand-for-vec_cmp.patch | 317 ++++++ ...ent-vector-cbranch-optab-for-LSX-and.patch | 120 +++ ...-useless-UNSPECs-and-define_mode_att.patch | 235 +++++ ...lector-error-in-lasx_xvexth_h-w-d-pa.patch | 64 ++ ...gs-in-insn-patterns-lasx_xvrepl128ve.patch | 155 +++ ...Add-some-vector-pack-unpack-patterns.patch | 966 ++++++++++++++++++ ...-Add-standard-patterns-uabd-and-sabd.patch | 411 ++++++++ ...mize-for-conditional-move-operations.patch | 285 ++++++ ...ze-initializing-fp-resgister-to-zero.patch | 86 ++ ...h-Opitmize-the-cost-of-vec_construct.patch | 94 ++ ...te-the-final-immediate-for-lu12i.w-l.patch | 237 +++++ 0027-LoongArch-Add-alsl.wu.patch | 63 ++ 0028-LoongArch-Fix-cost-model-for-alsl.patch | 97 ++ ...fy-using-bstr-ins-pick-instructions-.patch | 220 ++++ ...e-reassociation-for-bitwise-operatio.patch | 396 +++++++ ...LoongArch-Implement-target-attribute.patch | 948 +++++++++++++++++ 0032-LoongArch-Implement-target-pragma.patch | 498 +++++++++ ...ong-code-with-optab-_alsl_reversesi_.patch | 89 ++ ...-invalid-subregs-in-xorsign-PR118501.patch | 35 + ...E-caused-by-illegal-calls-to-builtin.patch | 67 ++ ...ch-Correct-the-mode-for-mask-eq-ne-z.patch | 58 ++ ...he-function-loongarch_register_pragm.patch | 160 +++ ...the-function-loongarch_cpu_cpp_built.patch | 195 ++++ ...setting-the-compilation-options-upda.patch | 218 ++++ ...fpu-none-__loongarch_frecipe-shouldn.patch | 88 ++ ...h-Adjust-the-cost-of-ADDRESS_REG_REG.patch | 180 ++++ ...e-issue-of-function-jump-out-of-rang.patch | 45 + ...-ADD-IOR-or-XOR-when-combining-objec.patch | 147 +++ ...rder-using-vrepli-instructions-to-ma.patch | 168 +++ ...oongArch-Allow-moving-TImode-vectors.patch | 222 ++++ ...ent-vec_widen_mult_-even-odd-_-for-L.patch | 108 ++ ...ent-su-dot_prod-for-LSX-and-LASX-mod.patch | 90 ++ ...rmal-RTL-pattern-instead-of-UNSPEC-f.patch | 190 ++++ ...correct-reorder-of-__lsx_vldx-and-__.patch | 260 +++++ ...E-when-trying-to-recognize-bitwise-a.patch | 112 ++ ...Arch-Don-t-use-C-17-feature-PR119238.patch | 49 + 0052-LoongArch-Add-ABI-names-for-FPR.patch | 90 ++ ...Arch-Support-Q-suffix-for-__float128.patch | 76 ++ ...fault-alignment-for-functions-jumps-.patch | 91 ++ ...en-evolution.awk-compatible-with-Fre.patch | 43 + ...-Fix-awk-sed-usage-for-compatibility.patch | 130 +++ ...-dg-do-what-default-save-and-restore.patch | 57 ++ ...Arch-Prevent-subreg-of-subreg-in-CRC.patch | 68 ++ ...ix-errors-due-to-version-differences.patch | 538 ++++++++++ gcc.spec | 124 ++- 60 files changed, 10475 insertions(+), 1 deletion(-) create mode 100644 0001-libffi-LoongArch-Fix-soft-float-builds-of-libffi.patch create mode 100644 0002-Always-set-SECTION_RELRO-for-or-.data.rel.ro-.local-.patch create mode 100644 0003-LoongArch-testsuite-Add-O-for-jump-table-annotate.c.patch create mode 100644 0004-LoongArch-Make-__builtin_lsx_vorn_v-and-__builtin_la.patch create mode 100644 0005-LoongArch-Fix-clerical-errors-in-lasx_xvreplgr2vr_-a.patch create mode 100644 0006-LoongArch-Remove-redundant-code.patch create mode 100644 0007-LoongArch-testsuite-Fix-loongarch-vect-frint-scalar..patch create mode 100644 0008-LoongArch-testsuite-Fix-l-a-sx-andn-iorn.c.patch create mode 100644 0009-LoongArch-Mask-shift-offset-when-emit-xv-v-srl-sll-s.patch create mode 100644 0010-LoongArch-Remove-QHSD-and-use-QHWD-instead.patch create mode 100644 0011-LoongArch-Add-bit-reverse-operations.patch create mode 100644 0012-LoongArch-Add-CRC-expander-to-generate-faster-CRC.patch create mode 100644 0013-LoongArch-Combine-xor-and-crc-instructions.patch create mode 100644 0014-LoongArch-Add-crc-tests.patch create mode 100644 0015-LoongArch-Implement-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch create mode 100644 0016-LoongArch-Support-immediate_operand-for-vec_cmp.patch create mode 100644 0017-LoongArch-Implement-vector-cbranch-optab-for-LSX-and.patch create mode 100644 0018-LoongArch-Remove-useless-UNSPECs-and-define_mode_att.patch create mode 100644 0019-LoongArch-Fix-selector-error-in-lasx_xvexth_h-w-d-pa.patch create mode 100644 0020-LoongArch-Fix-bugs-in-insn-patterns-lasx_xvrepl128ve.patch create mode 100644 0021-LoongArch-Add-some-vector-pack-unpack-patterns.patch create mode 100644 0022-LoongArch-Add-standard-patterns-uabd-and-sabd.patch create mode 100644 0023-LoongArch-Optimize-for-conditional-move-operations.patch create mode 100644 0024-LoongArch-Optimize-initializing-fp-resgister-to-zero.patch create mode 100644 0025-LoongArch-Opitmize-the-cost-of-vec_construct.patch create mode 100644 0026-LoongArch-Generate-the-final-immediate-for-lu12i.w-l.patch create mode 100644 0027-LoongArch-Add-alsl.wu.patch create mode 100644 0028-LoongArch-Fix-cost-model-for-alsl.patch create mode 100644 0029-LoongArch-Simplify-using-bstr-ins-pick-instructions-.patch create mode 100644 0030-LoongArch-Improve-reassociation-for-bitwise-operatio.patch create mode 100644 0031-LoongArch-Implement-target-attribute.patch create mode 100644 0032-LoongArch-Implement-target-pragma.patch create mode 100644 0033-LoongArch-Fix-wrong-code-with-optab-_alsl_reversesi_.patch create mode 100644 0034-LoongArch-Fix-invalid-subregs-in-xorsign-PR118501.patch create mode 100644 0035-LoongArch-Fix-ICE-caused-by-illegal-calls-to-builtin.patch create mode 100644 0036-LoongArch-Correct-the-mode-for-mask-eq-ne-z.patch create mode 100644 0037-LoongArch-Move-the-function-loongarch_register_pragm.patch create mode 100644 0038-LoongArch-Split-the-function-loongarch_cpu_cpp_built.patch create mode 100644 0039-LoongArch-After-setting-the-compilation-options-upda.patch create mode 100644 0040-LoongArch-When-mfpu-none-__loongarch_frecipe-shouldn.patch create mode 100644 0041-LoongArch-Adjust-the-cost-of-ADDRESS_REG_REG.patch create mode 100644 0042-LoongArch-Fix-the-issue-of-function-jump-out-of-rang.patch create mode 100644 0043-LoongArch-Accept-ADD-IOR-or-XOR-when-combining-objec.patch create mode 100644 0044-LoongArch-Try-harder-using-vrepli-instructions-to-ma.patch create mode 100644 0045-LoongArch-Allow-moving-TImode-vectors.patch create mode 100644 0046-LoongArch-Implement-vec_widen_mult_-even-odd-_-for-L.patch create mode 100644 0047-LoongArch-Implement-su-dot_prod-for-LSX-and-LASX-mod.patch create mode 100644 0048-LoongArch-Use-normal-RTL-pattern-instead-of-UNSPEC-f.patch create mode 100644 0049-LoongArch-Fix-incorrect-reorder-of-__lsx_vldx-and-__.patch create mode 100644 0050-LoongArch-Fix-ICE-when-trying-to-recognize-bitwise-a.patch create mode 100644 0051-LoongArch-Don-t-use-C-17-feature-PR119238.patch create mode 100644 0052-LoongArch-Add-ABI-names-for-FPR.patch create mode 100644 0053-LoongArch-Support-Q-suffix-for-__float128.patch create mode 100644 0054-LoongArch-Set-default-alignment-for-functions-jumps-.patch create mode 100644 0055-LoongArch-Make-gen-evolution.awk-compatible-with-Fre.patch create mode 100644 0056-LoongArch-Fix-awk-sed-usage-for-compatibility.patch create mode 100644 0057-LoongArch-Change-dg-do-what-default-save-and-restore.patch create mode 100644 0058-LoongArch-Prevent-subreg-of-subreg-in-CRC.patch create mode 100644 0059-LoongArch-Fix-errors-due-to-version-differences.patch diff --git a/0001-libffi-LoongArch-Fix-soft-float-builds-of-libffi.patch b/0001-libffi-LoongArch-Fix-soft-float-builds-of-libffi.patch new file mode 100644 index 0000000..8ed841f --- /dev/null +++ b/0001-libffi-LoongArch-Fix-soft-float-builds-of-libffi.patch @@ -0,0 +1,34 @@ +From b38679cb1f713a9cba9e6f64d86ba913602ea354 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Sat, 27 Jan 2024 15:09:46 +0800 +Subject: [PATCH 01/59] libffi: LoongArch: Fix soft-float builds of libffi + +This patch correspond to the upstream PR: +https://github.com/libffi/libffi/pull/817 +And has been merged. + +libffi/ChangeLog: + + * src/loongarch64/ffi.c: Avoid defining floats + in struct call_context if the ABI is soft-float. +--- + libffi/src/loongarch64/ffi.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/libffi/src/loongarch64/ffi.c b/libffi/src/loongarch64/ffi.c +index 140be3bc3..01c2e18a3 100644 +--- a/libffi/src/loongarch64/ffi.c ++++ b/libffi/src/loongarch64/ffi.c +@@ -58,7 +58,9 @@ + */ + typedef struct call_context + { ++#if !defined(__loongarch_soft_float) + ABI_FLOAT fa[8]; ++#endif + size_t a[10]; + } call_context; + +-- +2.47.3 + diff --git a/0002-Always-set-SECTION_RELRO-for-or-.data.rel.ro-.local-.patch b/0002-Always-set-SECTION_RELRO-for-or-.data.rel.ro-.local-.patch new file mode 100644 index 0000000..12e9207 --- /dev/null +++ b/0002-Always-set-SECTION_RELRO-for-or-.data.rel.ro-.local-.patch @@ -0,0 +1,89 @@ +From ab436c2a9c5dd5c55b9c4d5a6f19bd82ed8d9716 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 11 Oct 2024 02:44:27 +0800 +Subject: [PATCH 02/59] Always set SECTION_RELRO for or .data.rel.ro{,.local} + [PR116887] + +At least two ports (hppa and loongarch) need to set SECTION_RELRO for +.data.rel.ro{,.local} in section_type_flags (PR52999 and PR116887), and +I cannot see a reason not to just set it in the generic code. + +With this applied we can also remove the hppa-specific +pa_section_type_flags in a future patch. + +gcc/ChangeLog: + + PR target/116887 + * varasm.cc (default_section_type_flags): Always set + SECTION_RELRO if name is .data.rel.ro{,.local}. + +gcc/testsuite/ChangeLog: + + PR target/116887 + * gcc.dg/pr116887.c: New test. +--- + gcc/testsuite/gcc.dg/pr116887.c | 23 +++++++++++++++++++++++ + gcc/varasm.cc | 10 ++++------ + 2 files changed, 27 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/pr116887.c + +diff --git a/gcc/testsuite/gcc.dg/pr116887.c b/gcc/testsuite/gcc.dg/pr116887.c +new file mode 100644 +index 000000000..b7255e09a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr116887.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-fpic" } */ ++ ++struct link_map ++{ ++ struct link_map *l_next; ++}; ++struct rtld_global ++{ ++ struct link_map *_ns_loaded; ++ char buf[4096]; ++ struct link_map _dl_rtld_map; ++}; ++extern struct rtld_global _rtld_global; ++static int _dlfo_main __attribute__ ((section (".data.rel.ro"), used)); ++void ++_dlfo_process_initial (int ns) ++{ ++ for (struct link_map *l = _rtld_global._ns_loaded; l != ((void *)0); ++ l = l->l_next) ++ if (l == &_rtld_global._dl_rtld_map) ++ asm (""); ++} +diff --git a/gcc/varasm.cc b/gcc/varasm.cc +index a4b1cc686..19e2194a9 100644 +--- a/gcc/varasm.cc ++++ b/gcc/varasm.cc +@@ -6720,6 +6720,9 @@ default_section_type_flags (tree decl, const char *name, int reloc) + + if (decl && TREE_CODE (decl) == FUNCTION_DECL) + flags = SECTION_CODE; ++ else if (strcmp (name, ".data.rel.ro") == 0 ++ || strcmp (name, ".data.rel.ro.local") == 0) ++ flags = SECTION_WRITE | SECTION_RELRO; + else if (decl) + { + enum section_category category +@@ -6733,12 +6736,7 @@ default_section_type_flags (tree decl, const char *name, int reloc) + flags = SECTION_WRITE; + } + else +- { +- flags = SECTION_WRITE; +- if (strcmp (name, ".data.rel.ro") == 0 +- || strcmp (name, ".data.rel.ro.local") == 0) +- flags |= SECTION_RELRO; +- } ++ flags = SECTION_WRITE; + + if (decl && DECL_P (decl) && DECL_COMDAT_GROUP (decl)) + flags |= SECTION_LINKONCE; +-- +2.47.3 + diff --git a/0003-LoongArch-testsuite-Add-O-for-jump-table-annotate.c.patch b/0003-LoongArch-testsuite-Add-O-for-jump-table-annotate.c.patch new file mode 100644 index 0000000..bdd6dea --- /dev/null +++ b/0003-LoongArch-testsuite-Add-O-for-jump-table-annotate.c.patch @@ -0,0 +1,32 @@ +From b2f518b3eadf844a09efe1125bcc8c2fbaf733bf Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 2 Nov 2024 01:26:47 +0800 +Subject: [PATCH 03/59] LoongArch: testsuite: Add -O for jump-table-annotate.c + +Without optimization, GCC does not emit a jump table for the test case. + +I'm not sure if the test case has been wrong in the first place or +something has changed in these months... + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/jump-table-annotate.c + (dg-additional-options): Add -O. +--- + gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +index 9d58e60e3..1895f6f03 100644 +--- a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c ++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-additional-options "-mannotate-tablejump" } */ ++/* { dg-additional-options "-O -mannotate-tablejump" } */ + + extern void asdf(int); + void foo(int x) { +-- +2.47.3 + diff --git a/0004-LoongArch-Make-__builtin_lsx_vorn_v-and-__builtin_la.patch b/0004-LoongArch-Make-__builtin_lsx_vorn_v-and-__builtin_la.patch new file mode 100644 index 0000000..ef34a85 --- /dev/null +++ b/0004-LoongArch-Make-__builtin_lsx_vorn_v-and-__builtin_la.patch @@ -0,0 +1,127 @@ +From 2fdf6a85eb0d1be857b1f9b73d60f8b9558ca496 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 31 Oct 2024 23:58:23 +0800 +Subject: [PATCH 04/59] LoongArch: Make __builtin_lsx_vorn_v and + __builtin_lasx_xvorn_v arguments and return values unsigned + +Align them with other vector bitwise builtins. + +This may break programs directly invoking __builtin_lsx_vorn_v or +__builtin_lasx_xvorn_v, but doing so is not supported (as builtins are +not documented, only intrinsics are documented and users should use them +instead). + +gcc/ChangeLog: + + * config/loongarch/loongarch-builtins.cc (vorn_v, xvorn_v): Use + unsigned vector modes. + * config/loongarch/lsxintrin.h (__lsx_vorn_v): Cast arguments to + v16u8. + * config/loongarch/lasxintrin.h (__lasx_xvorn_v): Cast arguments + to v32u8. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-builtin.c (__lsx_vorn_v): + Change arguments and return value to v16u8. + * gcc.target/loongarch/vector/lasx/lasx-builtin.c + (__lasx_xvorn_v): Change arguments and return value to v32u8. +--- + gcc/config/loongarch/lasxintrin.h | 4 ++-- + gcc/config/loongarch/loongarch-builtins.cc | 4 ++-- + gcc/config/loongarch/lsxintrin.h | 4 ++-- + gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c | 4 ++-- + gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c | 4 ++-- + 5 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index 5e65e76e7..ed5eaf438 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -3564,11 +3564,11 @@ __m256i __lasx_xvssrln_w_d (__m256i _1, __m256i _2) + } + + /* Assembly instruction format: xd, xj, xk. */ +-/* Data types in instruction templates: V32QI, V32QI, V32QI. */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256i __lasx_xvorn_v (__m256i _1, __m256i _2) + { +- return (__m256i)__builtin_lasx_xvorn_v ((v32i8)_1, (v32i8)_2); ++ return (__m256i)__builtin_lasx_xvorn_v ((v32u8)_1, (v32u8)_2); + } + + /* Assembly instruction format: xd, i13. */ +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index f9ff85d2e..263942196 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -1568,7 +1568,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + LSX_BUILTIN (vssrln_b_h, LARCH_V16QI_FTYPE_V8HI_V8HI), + LSX_BUILTIN (vssrln_h_w, LARCH_V8HI_FTYPE_V4SI_V4SI), + LSX_BUILTIN (vssrln_w_d, LARCH_V4SI_FTYPE_V2DI_V2DI), +- LSX_BUILTIN (vorn_v, LARCH_V16QI_FTYPE_V16QI_V16QI), ++ LSX_BUILTIN (vorn_v, LARCH_UV16QI_FTYPE_UV16QI_UV16QI), + LSX_BUILTIN (vldi, LARCH_V2DI_FTYPE_HI), + LSX_BUILTIN (vshuf_b, LARCH_V16QI_FTYPE_V16QI_V16QI_V16QI), + LSX_BUILTIN (vldx, LARCH_V16QI_FTYPE_CVPOINTER_DI), +@@ -2118,7 +2118,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + LASX_BUILTIN (xvssrln_b_h, LARCH_V32QI_FTYPE_V16HI_V16HI), + LASX_BUILTIN (xvssrln_h_w, LARCH_V16HI_FTYPE_V8SI_V8SI), + LASX_BUILTIN (xvssrln_w_d, LARCH_V8SI_FTYPE_V4DI_V4DI), +- LASX_BUILTIN (xvorn_v, LARCH_V32QI_FTYPE_V32QI_V32QI), ++ LASX_BUILTIN (xvorn_v, LARCH_UV32QI_FTYPE_UV32QI_UV32QI), + LASX_BUILTIN (xvldi, LARCH_V4DI_FTYPE_HI), + LASX_BUILTIN (xvldx, LARCH_V32QI_FTYPE_CVPOINTER_DI), + LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_CVPOINTER_DI), +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index 57a6fc40a..70b0b1576 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -4745,11 +4745,11 @@ __m128i __lsx_vssrln_w_d (__m128i _1, __m128i _2) + } + + /* Assembly instruction format: vd, vj, vk. */ +-/* Data types in instruction templates: V16QI, V16QI, V16QI. */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128i __lsx_vorn_v (__m128i _1, __m128i _2) + { +- return (__m128i)__builtin_lsx_vorn_v ((v16i8)_1, (v16i8)_2); ++ return (__m128i)__builtin_lsx_vorn_v ((v16u8)_1, (v16u8)_2); + } + + /* Assembly instruction format: vd, i13. */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c +index b1a903b4a..64ff870a4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-builtin.c +@@ -3178,8 +3178,8 @@ __lasx_xvssrln_w_d (v4i64 _1, v4i64 _2) + { + return __builtin_lasx_xvssrln_w_d (_1, _2); + } +-v32i8 +-__lasx_xvorn_v (v32i8 _1, v32i8 _2) ++v32u8 ++__lasx_xvorn_v (v32u8 _1, v32u8 _2) + { + return __builtin_lasx_xvorn_v (_1, _2); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c +index 13013114d..1c9f384e3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c +@@ -4006,8 +4006,8 @@ __lsx_vssrln_w_d (v2i64 _1, v2i64 _2) + { + return __builtin_lsx_vssrln_w_d (_1, _2); + } +-v16i8 +-__lsx_vorn_v (v16i8 _1, v16i8 _2) ++v16u8 ++__lsx_vorn_v (v16u8 _1, v16u8 _2) + { + return __builtin_lsx_vorn_v (_1, _2); + } +-- +2.47.3 + diff --git a/0005-LoongArch-Fix-clerical-errors-in-lasx_xvreplgr2vr_-a.patch b/0005-LoongArch-Fix-clerical-errors-in-lasx_xvreplgr2vr_-a.patch new file mode 100644 index 0000000..e20b329 --- /dev/null +++ b/0005-LoongArch-Fix-clerical-errors-in-lasx_xvreplgr2vr_-a.patch @@ -0,0 +1,47 @@ +From fc2f3bcb449f61895f5f59a9ba615fb7f7e9fb09 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Sat, 2 Nov 2024 15:30:40 +0800 +Subject: [PATCH 05/59] LoongArch: Fix clerical errors in lasx_xvreplgr2vr_* + and lsx_vreplgr2vr_*. + +[x]vldi.{b/h/w/d} is not implemented in LoongArch. +Use the macro [x]vrepli.{b/h/w/d} to replace. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Fixed. + * config/loongarch/lsx.md: Fixed. +--- + gcc/config/loongarch/lasx.md | 2 +- + gcc/config/loongarch/lsx.md | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index be2f6ca8e..c0795a04b 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1402,7 +1402,7 @@ + "ISA_HAS_LASX" + { + if (which_alternative == 1) +- return "xvldi.b\t%u0,0" ; ++ return "xvrepli.b\t%u0,0"; + + return "xvreplgr2vr.\t%u0,%z1"; + } +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 5cb5bc61f..e6307d447 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1275,7 +1275,7 @@ + "ISA_HAS_LSX" + { + if (which_alternative == 1) +- return "vldi.\t%w0,0"; ++ return "vrepli.b\t%w0,0"; + + return "vreplgr2vr.\t%w0,%z1"; + } +-- +2.47.3 + diff --git a/0006-LoongArch-Remove-redundant-code.patch b/0006-LoongArch-Remove-redundant-code.patch new file mode 100644 index 0000000..3e5e13d --- /dev/null +++ b/0006-LoongArch-Remove-redundant-code.patch @@ -0,0 +1,171 @@ +From d5f30aede51ddf8ea573b6bba3b7df170953d1a9 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Sat, 2 Nov 2024 10:01:31 +0800 +Subject: [PATCH 06/59] LoongArch: Remove redundant code. + +TARGET_ASM_ALIGNED_{HI,SI,QI}_OP are defined repeatedly and deleted. + +gcc/ChangeLog: + + * config/loongarch/loongarch-builtins.cc + (loongarch_builtin_vectorized_function): Delete. + (LARCH_GET_BUILTIN): Delete. + * config/loongarch/loongarch-protos.h + (loongarch_builtin_vectorized_function): Delete. + * config/loongarch/loongarch.cc + (TARGET_ASM_ALIGNED_HI_OP): Delete. + (TARGET_ASM_ALIGNED_SI_OP): Delete. + (TARGET_ASM_ALIGNED_DI_OP): Delete. +--- + gcc/config/loongarch/loongarch-builtins.cc | 102 --------------------- + gcc/config/loongarch/loongarch-protos.h | 1 - + gcc/config/loongarch/loongarch.cc | 8 -- + 3 files changed, 111 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 263942196..867852a91 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -2525,108 +2525,6 @@ loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED) + return loongarch_builtin_decls[code]; + } + +-/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION. */ +- +-tree +-loongarch_builtin_vectorized_function (unsigned int fn, tree type_out, +- tree type_in) +-{ +- machine_mode in_mode, out_mode; +- int in_n, out_n; +- +- if (TREE_CODE (type_out) != VECTOR_TYPE +- || TREE_CODE (type_in) != VECTOR_TYPE +- || !ISA_HAS_LSX) +- return NULL_TREE; +- +- out_mode = TYPE_MODE (TREE_TYPE (type_out)); +- out_n = TYPE_VECTOR_SUBPARTS (type_out); +- in_mode = TYPE_MODE (TREE_TYPE (type_in)); +- in_n = TYPE_VECTOR_SUBPARTS (type_in); +- +- /* INSN is the name of the associated instruction pattern, without +- the leading CODE_FOR_. */ +-#define LARCH_GET_BUILTIN(INSN) \ +- loongarch_builtin_decls[loongarch_get_builtin_decl_index[CODE_FOR_##INSN]] +- +- switch (fn) +- { +- CASE_CFN_CEIL: +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return LARCH_GET_BUILTIN (lsx_vfrintrp_d); +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lasx_xvfrintrp_d); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lsx_vfrintrp_s); +- if (out_n == 8 && in_n == 8) +- return LARCH_GET_BUILTIN (lasx_xvfrintrp_s); +- } +- break; +- +- CASE_CFN_TRUNC: +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return LARCH_GET_BUILTIN (lsx_vfrintrz_d); +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lasx_xvfrintrz_d); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lsx_vfrintrz_s); +- if (out_n == 8 && in_n == 8) +- return LARCH_GET_BUILTIN (lasx_xvfrintrz_s); +- } +- break; +- +- CASE_CFN_RINT: +- CASE_CFN_ROUND: +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return LARCH_GET_BUILTIN (lsx_vfrint_d); +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lasx_xvfrint_d); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lsx_vfrint_s); +- if (out_n == 8 && in_n == 8) +- return LARCH_GET_BUILTIN (lasx_xvfrint_s); +- } +- break; +- +- CASE_CFN_FLOOR: +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return LARCH_GET_BUILTIN (lsx_vfrintrm_d); +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lasx_xvfrintrm_d); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return LARCH_GET_BUILTIN (lsx_vfrintrm_s); +- if (out_n == 8 && in_n == 8) +- return LARCH_GET_BUILTIN (lasx_xvfrintrm_s); +- } +- break; +- +- default: +- break; +- } +- +- return NULL_TREE; +-} +- + /* Take argument ARGNO from EXP's argument list and convert it into + an expand operand. Store the operand in *OP. */ + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index abf1a0893..d6c514f58 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -203,7 +203,6 @@ extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *); + extern tree loongarch_builtin_decl (unsigned int, bool); + extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode, int); +-extern tree loongarch_builtin_vectorized_function (unsigned int, tree, tree); + extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 6be0d80b3..5cce912bc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8094,14 +8094,6 @@ loongarch_set_handled_components (sbitmap components) + cfun->machine->reg_is_wrapped_separately[regno] = true; + } + +-/* Initialize the GCC target structure. */ +-#undef TARGET_ASM_ALIGNED_HI_OP +-#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +-#undef TARGET_ASM_ALIGNED_SI_OP +-#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +-#undef TARGET_ASM_ALIGNED_DI_OP +-#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" +- + /* Use the vshuf instruction to implement all 128-bit constant vector + permuatation. */ + +-- +2.47.3 + diff --git a/0007-LoongArch-testsuite-Fix-loongarch-vect-frint-scalar..patch b/0007-LoongArch-testsuite-Fix-loongarch-vect-frint-scalar..patch new file mode 100644 index 0000000..5a6def5 --- /dev/null +++ b/0007-LoongArch-testsuite-Fix-loongarch-vect-frint-scalar..patch @@ -0,0 +1,41 @@ +From 4215a3a2c7990bbee970962b0573d4e87da5ac8c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 26 Nov 2024 14:51:30 +0800 +Subject: [PATCH 07/59] LoongArch: testsuite: Fix + loongarch/vect-frint-scalar.c. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In r15-5327, change the default language version for C compilation from +-std=gnu17 to -std=gnu23. + +ISO C99 and C11 allow ceil, floor, round and trunc, and their float and +long double variants, to raise the “inexact” exception, +but ISO/IEC TS 18661-1:2014, the C bindings to IEEE 754-2008, as +integrated into ISO C23, does not allow these functions to do so. + +So add '-ffp-int-builtin-inexact' to this test case. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-frint-scalar.c: Add + '-ffp-int-builtin-inexact'. +--- + gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +index c7cb40be7..dbcb9065a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx" } */ ++/* { dg-options "-O2 -mlsx -ffp-int-builtin-inexact" } */ + + #define test(func, suffix) \ + __typeof__ (1.##suffix) \ +-- +2.47.3 + diff --git a/0008-LoongArch-testsuite-Fix-l-a-sx-andn-iorn.c.patch b/0008-LoongArch-testsuite-Fix-l-a-sx-andn-iorn.c.patch new file mode 100644 index 0000000..5b1ed6d --- /dev/null +++ b/0008-LoongArch-testsuite-Fix-l-a-sx-andn-iorn.c.patch @@ -0,0 +1,45 @@ +From fb34795fe85d86d3c97d3a8f8177b93d65435eff Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 26 Nov 2024 16:04:24 +0800 +Subject: [PATCH 08/59] LoongArch: testsuite: Fix l{a}sx-andn-iorn.c. + +Add '-fdump-tree-optimized' to this testcases. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-andn-iorn.c: + Add '-fdump-tree-optimized'. + * gcc.target/loongarch/lsx-andn-iorn.c: + Likewise. +--- + gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c | 2 +- + gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +index 4aa5f19a6..86b04dbbb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +@@ -3,7 +3,7 @@ + #include "./lsx-andn-iorn.c" + + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx -ftree-vectorize" } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize -fdump-tree-optimized" } */ + + /* We should produce a BIT_ANDC and BIT_IORC here. */ + +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +index 7bceccd37..14838ab8b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ftree-vectorize" } */ ++/* { dg-options "-O2 -mlsx -ftree-vectorize -fdump-tree-optimized" } */ + + #ifndef N + #define N 4 +-- +2.47.3 + diff --git a/0009-LoongArch-Mask-shift-offset-when-emit-xv-v-srl-sll-s.patch b/0009-LoongArch-Mask-shift-offset-when-emit-xv-v-srl-sll-s.patch new file mode 100644 index 0000000..1cd7db1 --- /dev/null +++ b/0009-LoongArch-Mask-shift-offset-when-emit-xv-v-srl-sll-s.patch @@ -0,0 +1,448 @@ +From b72db38b82c2564eb53d9e27f49129ef558661aa Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Thu, 28 Nov 2024 09:26:25 +0800 +Subject: [PATCH 09/59] LoongArch: Mask shift offset when emit {xv, v}{srl, + sll, sra} with sameimm vector + +For {xv,v}{srl,sll,sra}, the constraint `vector_same_uimm6` cause overflow +in when emit {w,h,b}. Since the number of bits shifted is the remainder of +the register value, it is actually unnecessary to constrain the range. +Simply mask the shift number with the unit-bit-width, without any +constraint on the shift range. + +gcc/ChangeLog: + + * config/loongarch/constraints.md (Uuv6, Uuvx): Remove Uuv6, + add Uuvx as replicated vector const with unsigned range [0,umax]. + * config/loongarch/lasx.md (xvsrl, xvsra, xvsll): Mask shift + offset by its unit bits. + * config/loongarch/lsx.md (vsrl, vsra, vsll): Likewise. + * config/loongarch/loongarch-protos.h + (loongarch_const_vector_same_int_p): Set default for low and high. + * config/loongarch/predicates.md: Replace reg_or_vector_same_uimm6 + _operand to reg_or_vector_same_uimm_operand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c: New test. +--- + gcc/config/loongarch/constraints.md | 14 ++-- + gcc/config/loongarch/lasx.md | 60 ++++++++++++---- + gcc/config/loongarch/loongarch-protos.h | 5 +- + gcc/config/loongarch/lsx.md | 60 ++++++++++++---- + gcc/config/loongarch/predicates.md | 8 +-- + .../vector/lasx/lasx-shift-sameimm-vec.c | 72 +++++++++++++++++++ + .../vector/lsx/lsx-shift-sameimm-vec.c | 72 +++++++++++++++++++ + 7 files changed, 254 insertions(+), 37 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index d653ea82a..40ac8d7f2 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -334,19 +334,19 @@ + (and (match_code "const_vector") + (match_test "loongarch_const_vector_same_int_p (op, mode, -16, 15)"))) + +-(define_constraint "Uuv6" +- "@internal +- A replicated vector const in which the replicated value is in the range +- [0,63]." +- (and (match_code "const_vector") +- (match_test "loongarch_const_vector_same_int_p (op, mode, 0, 63)"))) +- + (define_constraint "Urv8" + "@internal + A replicated vector const with replicated byte values as well as elements" + (and (match_code "const_vector") + (match_test "loongarch_const_vector_same_bytes_p (op, mode)"))) + ++(define_constraint "Uuvx" ++ "@internal ++ A replicated vector const in which the replicated value is in the unsigned ++ range [0,umax]." ++ (and (match_code "const_vector") ++ (match_test "loongarch_const_vector_same_int_p (op, mode)"))) ++ + (define_memory_constraint "ZC" + "A memory operand whose address is formed by a base register and offset + that is suitable for use in instructions with the same addressing mode +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index c0795a04b..e4ea2e4fa 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1013,11 +1013,23 @@ + [(set (match_operand:ILASX 0 "register_operand" "=f,f") + (lshiftrt:ILASX + (match_operand:ILASX 1 "register_operand" "f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LASX" +- "@ +- xvsrl.\t%u0,%u1,%u2 +- xvsrli.\t%u0,%u1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "xvsrl.\t%u0,%u1,%u2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "xvsrli.\t%u0,%u1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +@@ -1026,11 +1038,23 @@ + [(set (match_operand:ILASX 0 "register_operand" "=f,f") + (ashiftrt:ILASX + (match_operand:ILASX 1 "register_operand" "f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LASX" +- "@ +- xvsra.\t%u0,%u1,%u2 +- xvsrai.\t%u0,%u1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "xvsra.\t%u0,%u1,%u2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "xvsrai.\t%u0,%u1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +@@ -1039,11 +1063,23 @@ + [(set (match_operand:ILASX 0 "register_operand" "=f,f") + (ashift:ILASX + (match_operand:ILASX 1 "register_operand" "f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LASX" +- "@ +- xvsll.\t%u0,%u1,%u2 +- xvslli.\t%u0,%u1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "xvsll.\t%u0,%u1,%u2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "xvslli.\t%u0,%u1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index d6c514f58..deea5e675 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -113,8 +113,9 @@ extern rtx loongarch_return_addr (int, rtx); + + extern bool loongarch_const_vector_same_val_p (rtx, machine_mode); + extern bool loongarch_const_vector_same_bytes_p (rtx, machine_mode); +-extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, HOST_WIDE_INT, +- HOST_WIDE_INT); ++extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, ++ HOST_WIDE_INT low = HOST_WIDE_INT_MIN, ++ HOST_WIDE_INT high = HOST_WIDE_INT_MAX); + extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode); + extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode); + extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index e6307d447..d05fd8549 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -879,11 +879,23 @@ + [(set (match_operand:ILSX 0 "register_operand" "=f,f") + (lshiftrt:ILSX + (match_operand:ILSX 1 "register_operand" "f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LSX" +- "@ +- vsrl.\t%w0,%w1,%w2 +- vsrli.\t%w0,%w1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "vsrl.\t%w0,%w1,%w2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "vsrli.\t%w0,%w1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +@@ -891,11 +903,23 @@ + [(set (match_operand:ILSX 0 "register_operand" "=f,f") + (ashiftrt:ILSX + (match_operand:ILSX 1 "register_operand" "f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LSX" +- "@ +- vsra.\t%w0,%w1,%w2 +- vsrai.\t%w0,%w1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "vsra.\t%w0,%w1,%w2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "vsrai.\t%w0,%w1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +@@ -903,11 +927,23 @@ + [(set (match_operand:ILSX 0 "register_operand" "=f,f") + (ashift:ILSX + (match_operand:ILSX 1 "register_operand" "f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))] ++ (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))] + "ISA_HAS_LSX" +- "@ +- vsll.\t%w0,%w1,%w2 +- vslli.\t%w0,%w1,%E2" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "vsll.\t%w0,%w1,%w2"; ++ case 1: ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0)); ++ operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1)); ++ return "vslli.\t%w0,%w1,%d2"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 2b7f7ed47..b66e0e18d 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -635,10 +635,10 @@ + return loongarch_const_vector_same_int_p (op, mode, -31, 31); + }) + +-(define_predicate "const_vector_same_uimm6_operand" ++(define_predicate "const_vector_same_uimm_operand" + (match_code "const_vector") + { +- return loongarch_const_vector_same_int_p (op, mode, 0, 63); ++ return loongarch_const_vector_same_int_p (op, mode); + }) + + (define_predicate "par_const_vector_shf_set_operand" +@@ -663,6 +663,6 @@ + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_vector_same_ximm5_operand"))) + +-(define_predicate "reg_or_vector_same_uimm6_operand" ++(define_predicate "reg_or_vector_same_uimm_operand" + (ior (match_operand 0 "register_operand") +- (match_operand 0 "const_vector_same_uimm6_operand"))) ++ (match_operand 0 "const_vector_same_uimm_operand"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c +new file mode 100644 +index 000000000..5808b066d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c +@@ -0,0 +1,72 @@ ++/* Test shift bits overflow in vector */ ++/* { dg-do assemble } */ ++/* { dg-options "-mlasx -O2 -save-temps" } */ ++/* { dg-final { scan-assembler "xvslli.b.*,1" } } */ ++/* { dg-final { scan-assembler "xvslli.b.*,7" } } */ ++/* { dg-final { scan-assembler "xvslli.h.*,1" } } */ ++/* { dg-final { scan-assembler "xvslli.h.*,15" } } */ ++/* { dg-final { scan-assembler "xvslli.w.*,1" } } */ ++/* { dg-final { scan-assembler "xvslli.w.*,31" } } */ ++/* { dg-final { scan-assembler "xvslli.d.*,1" } } */ ++/* { dg-final { scan-assembler "xvslli.d.*,63" } } */ ++/* { dg-final { scan-assembler "xvsrli.b.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrli.b.*,7" } } */ ++/* { dg-final { scan-assembler "xvsrli.h.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrli.h.*,15" } } */ ++/* { dg-final { scan-assembler "xvsrli.w.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrli.w.*,31" } } */ ++/* { dg-final { scan-assembler "xvsrli.d.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrli.d.*,63" } } */ ++/* { dg-final { scan-assembler "xvsrai.b.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrai.b.*,7" } } */ ++/* { dg-final { scan-assembler "xvsrai.h.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrai.h.*,15" } } */ ++/* { dg-final { scan-assembler "xvsrai.w.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrai.w.*,31" } } */ ++/* { dg-final { scan-assembler "xvsrai.d.*,1" } } */ ++/* { dg-final { scan-assembler "xvsrai.d.*,63" } } */ ++ ++typedef signed char v32i8 __attribute__ ((vector_size (32), aligned (32))); ++typedef short v16i16 __attribute__ ((vector_size (32), aligned (32))); ++typedef int v8i32 __attribute__ ((vector_size (32), aligned (32))); ++typedef long long v4i64 __attribute__ ((vector_size (32), aligned (32))); ++ ++#define TWICE(_) _, _ ++#define V32I8_RVAL(_) (v32i8) {TWICE(TWICE(TWICE(TWICE(TWICE(_)))))} ++#define V16I16_RVAL(_) (v16i16) {TWICE(TWICE(TWICE(TWICE(_))))} ++#define V8I32_RVAL(_) (v8i32) {TWICE(TWICE(TWICE(_)))} ++#define V4I64_RVAL(_) (v4i64) {TWICE(TWICE(_))} ++ ++#define TEST_FUNC(typ, key, inst, rept, val) \ ++typ \ ++_##key##inst (typ _) \ ++{ \ ++ return __builtin_lasx_##inst(_, rept(val)); \ ++} ++ ++TEST_FUNC(v32i8, pos, xvsll_b, V32I8_RVAL, 65) ++TEST_FUNC(v32i8, neg, xvsll_b, V32I8_RVAL, -65) ++TEST_FUNC(v16i16, pos, xvsll_h, V16I16_RVAL, 65) ++TEST_FUNC(v16i16, neg, xvsll_h, V16I16_RVAL, -65) ++TEST_FUNC(v8i32, pos, xvsll_w, V8I32_RVAL, 65) ++TEST_FUNC(v8i32, neg, xvsll_w, V8I32_RVAL, -65) ++TEST_FUNC(v4i64, pos, xvsll_d, V4I64_RVAL, 65) ++TEST_FUNC(v4i64, neg, xvsll_d, V4I64_RVAL, -65) ++ ++TEST_FUNC(v32i8, pos, xvsrl_b, V32I8_RVAL, 65) ++TEST_FUNC(v32i8, neg, xvsrl_b, V32I8_RVAL, -65) ++TEST_FUNC(v16i16, pos, xvsrl_h, V16I16_RVAL, 65) ++TEST_FUNC(v16i16, neg, xvsrl_h, V16I16_RVAL, -65) ++TEST_FUNC(v8i32, pos, xvsrl_w, V8I32_RVAL, 65) ++TEST_FUNC(v8i32, neg, xvsrl_w, V8I32_RVAL, -65) ++TEST_FUNC(v4i64, pos, xvsrl_d, V4I64_RVAL, 65) ++TEST_FUNC(v4i64, neg, xvsrl_d, V4I64_RVAL, -65) ++ ++TEST_FUNC(v32i8, pos, xvsra_b, V32I8_RVAL, 65) ++TEST_FUNC(v32i8, neg, xvsra_b, V32I8_RVAL, -65) ++TEST_FUNC(v16i16, pos, xvsra_h, V16I16_RVAL, 65) ++TEST_FUNC(v16i16, neg, xvsra_h, V16I16_RVAL, -65) ++TEST_FUNC(v8i32, pos, xvsra_w, V8I32_RVAL, 65) ++TEST_FUNC(v8i32, neg, xvsra_w, V8I32_RVAL, -65) ++TEST_FUNC(v4i64, pos, xvsra_d, V4I64_RVAL, 65) ++TEST_FUNC(v4i64, neg, xvsra_d, V4I64_RVAL, -65) +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c +new file mode 100644 +index 000000000..c87deee1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c +@@ -0,0 +1,72 @@ ++/* Test shift bits overflow in vector */ ++/* { dg-do assemble } */ ++/* { dg-options "-mlsx -O2 -save-temps" } */ ++/* { dg-final { scan-assembler "vslli.b.*,1" } } */ ++/* { dg-final { scan-assembler "vslli.b.*,7" } } */ ++/* { dg-final { scan-assembler "vslli.h.*,1" } } */ ++/* { dg-final { scan-assembler "vslli.h.*,15" } } */ ++/* { dg-final { scan-assembler "vslli.w.*,1" } } */ ++/* { dg-final { scan-assembler "vslli.w.*,31" } } */ ++/* { dg-final { scan-assembler "vslli.d.*,1" } } */ ++/* { dg-final { scan-assembler "vslli.d.*,63" } } */ ++/* { dg-final { scan-assembler "vsrli.b.*,1" } } */ ++/* { dg-final { scan-assembler "vsrli.b.*,7" } } */ ++/* { dg-final { scan-assembler "vsrli.h.*,1" } } */ ++/* { dg-final { scan-assembler "vsrli.h.*,15" } } */ ++/* { dg-final { scan-assembler "vsrli.w.*,1" } } */ ++/* { dg-final { scan-assembler "vsrli.w.*,31" } } */ ++/* { dg-final { scan-assembler "vsrli.d.*,1" } } */ ++/* { dg-final { scan-assembler "vsrli.d.*,63" } } */ ++/* { dg-final { scan-assembler "vsrai.b.*,1" } } */ ++/* { dg-final { scan-assembler "vsrai.b.*,7" } } */ ++/* { dg-final { scan-assembler "vsrai.h.*,1" } } */ ++/* { dg-final { scan-assembler "vsrai.h.*,15" } } */ ++/* { dg-final { scan-assembler "vsrai.w.*,1" } } */ ++/* { dg-final { scan-assembler "vsrai.w.*,31" } } */ ++/* { dg-final { scan-assembler "vsrai.d.*,1" } } */ ++/* { dg-final { scan-assembler "vsrai.d.*,63" } } */ ++ ++typedef signed char v16i8 __attribute__ ((vector_size (16), aligned (16))); ++typedef short v8i16 __attribute__ ((vector_size (16), aligned (16))); ++typedef int v4i32 __attribute__ ((vector_size (16), aligned (16))); ++typedef long long v2i64 __attribute__ ((vector_size (16), aligned (16))); ++ ++#define TWICE(_) _, _ ++#define V16I8_RVAL(_) (v16i8) {TWICE(TWICE(TWICE(TWICE(_))))} ++#define V8I16_RVAL(_) (v8i16) {TWICE(TWICE(TWICE(_)))} ++#define V4I32_RVAL(_) (v4i32) {TWICE(TWICE(_))} ++#define V2I64_RVAL(_) (v2i64) {TWICE(_)} ++ ++#define TEST_FUNC(typ, key, inst, rept, val) \ ++typ \ ++_##key##inst (typ _) \ ++{ \ ++ return __builtin_lsx_##inst(_, rept(val)); \ ++} ++ ++TEST_FUNC(v16i8, pos, vsll_b, V16I8_RVAL, 65) ++TEST_FUNC(v16i8, neg, vsll_b, V16I8_RVAL, -65) ++TEST_FUNC(v8i16, pos, vsll_h, V8I16_RVAL, 65) ++TEST_FUNC(v8i16, neg, vsll_h, V8I16_RVAL, -65) ++TEST_FUNC(v4i32, pos, vsll_w, V4I32_RVAL, 65) ++TEST_FUNC(v4i32, neg, vsll_w, V4I32_RVAL, -65) ++TEST_FUNC(v2i64, pos, vsll_d, V2I64_RVAL, 65) ++TEST_FUNC(v2i64, neg, vsll_d, V2I64_RVAL, -65) ++ ++TEST_FUNC(v16i8, pos, vsrl_b, V16I8_RVAL, 65) ++TEST_FUNC(v16i8, neg, vsrl_b, V16I8_RVAL, -65) ++TEST_FUNC(v8i16, pos, vsrl_h, V8I16_RVAL, 65) ++TEST_FUNC(v8i16, neg, vsrl_h, V8I16_RVAL, -65) ++TEST_FUNC(v4i32, pos, vsrl_w, V4I32_RVAL, 65) ++TEST_FUNC(v4i32, neg, vsrl_w, V4I32_RVAL, -65) ++TEST_FUNC(v2i64, pos, vsrl_d, V2I64_RVAL, 65) ++TEST_FUNC(v2i64, neg, vsrl_d, V2I64_RVAL, -65) ++ ++TEST_FUNC(v16i8, pos, vsra_b, V16I8_RVAL, 65) ++TEST_FUNC(v16i8, neg, vsra_b, V16I8_RVAL, -65) ++TEST_FUNC(v8i16, pos, vsra_h, V8I16_RVAL, 65) ++TEST_FUNC(v8i16, neg, vsra_h, V8I16_RVAL, -65) ++TEST_FUNC(v4i32, pos, vsra_w, V4I32_RVAL, 65) ++TEST_FUNC(v4i32, neg, vsra_w, V4I32_RVAL, -65) ++TEST_FUNC(v2i64, pos, vsra_d, V2I64_RVAL, 65) ++TEST_FUNC(v2i64, neg, vsra_d, V2I64_RVAL, -65) +-- +2.47.3 + diff --git a/0010-LoongArch-Remove-QHSD-and-use-QHWD-instead.patch b/0010-LoongArch-Remove-QHSD-and-use-QHWD-instead.patch new file mode 100644 index 0000000..cf8deae --- /dev/null +++ b/0010-LoongArch-Remove-QHSD-and-use-QHWD-instead.patch @@ -0,0 +1,49 @@ +From 0bb898e2f6a1f5e653697a77db1ac83ab7c14847 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 2 Dec 2024 14:48:24 +0800 +Subject: [PATCH 10/59] LoongArch: Remove QHSD and use QHWD instead + +QHSD and QHWD are basically the same thing, but QHSD will be incorrect +when we start to add LA32 support. So it's just better to always use +QHWD. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (QHSD): Remove. + (loongarch__w__w): Use QHSD instead of QHWD. + (loongarch__w__w_extended): Likewise. +--- + gcc/config/loongarch/loongarch.md | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d8d444c7a..8329e9fea 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4366,13 +4366,12 @@ + + + +-(define_mode_iterator QHSD [QI HI SI DI]) + (define_int_iterator CRC [UNSPEC_CRC UNSPEC_CRCC]) + (define_int_attr crc [(UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc")]) + + (define_insn "loongarch__w__w" + [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") ++ (unspec:SI [(match_operand:QHWD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] + CRC))] + "" +@@ -4383,7 +4382,7 @@ + (define_insn "loongarch__w__w_extended" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI +- (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") ++ (unspec:SI [(match_operand:QHWD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] + CRC)))] + "TARGET_64BIT" +-- +2.47.3 + diff --git a/0011-LoongArch-Add-bit-reverse-operations.patch b/0011-LoongArch-Add-bit-reverse-operations.patch new file mode 100644 index 0000000..9a87d02 --- /dev/null +++ b/0011-LoongArch-Add-bit-reverse-operations.patch @@ -0,0 +1,90 @@ +From eeb8a1f76dbe23734ab735568572c9a79c8dae1c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 2 Dec 2024 10:53:27 +0800 +Subject: [PATCH 11/59] LoongArch: Add bit reverse operations + +LoongArch supports native bit reverse operation for QI, SI, DI, and for +HI we can expand it into a shift and a bit reverse in word_mode. + +I was reluctant to add them because until PR50481 is fixed these +operations will be just useless. But now it turns out we can use them +to optimize the bit reversing CRC calculation if recognized by the +generic CRC pass. So add them in prepare for the next patch adding CRC +expanders. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (@rbit): New + define_insn template. + (rbitsi_extended): New define_insn. + (rbitqi): New define_insn. + (rbithi): New define_expand. +--- + gcc/config/loongarch/loongarch.md | 51 +++++++++++++++++++++++++++++++ + 1 file changed, 51 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 8329e9fea..ebf633776 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4260,6 +4260,57 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "DI")]) + ++(define_insn "@rbit" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (bitreverse:GPR (match_operand:GPR 1 "register_operand" "r")))] ++ "" ++ "bitrev.\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) ++ ++(define_insn "rbitsi_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (bitreverse:SI (match_operand:SI 1 "register_operand" "r"))))] ++ "TARGET_64BIT" ++ "bitrev.w\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "SI")]) ++ ++;; If we don't care high bits, bitrev.4b can reverse bits of values in ++;; QImode. ++(define_insn "rbitqi" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (bitreverse:QI (match_operand:QI 1 "register_operand" "r")))] ++ "" ++ "bitrev.4b\t%0,%1" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "SI")]) ++ ++;; For HImode it's a little complicated... ++(define_expand "rbithi" ++ [(match_operand:HI 0 "register_operand") ++ (match_operand:HI 1 "register_operand")] ++ "" ++ { ++ rtx t = gen_reg_rtx (word_mode); ++ ++ /* Oh, using paradoxical subreg. I learnt the trick from RISC-V, ++ hoping we won't be blown up altogether one day. */ ++ emit_insn (gen_rbit(word_mode, t, ++ gen_lowpart (word_mode, operands[1]))); ++ t = expand_simple_binop (word_mode, LSHIFTRT, t, ++ GEN_INT (GET_MODE_BITSIZE (word_mode) - 16), ++ NULL_RTX, false, OPTAB_DIRECT); ++ ++ t = gen_lowpart (HImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_UNSIGNED); ++ emit_move_insn (operands[0], t); ++ ++ DONE; ++ }) ++ + (define_insn "@stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:X 0 "register_operand" "r") +-- +2.47.3 + diff --git a/0012-LoongArch-Add-CRC-expander-to-generate-faster-CRC.patch b/0012-LoongArch-Add-CRC-expander-to-generate-faster-CRC.patch new file mode 100644 index 0000000..a6a7cd1 --- /dev/null +++ b/0012-LoongArch-Add-CRC-expander-to-generate-faster-CRC.patch @@ -0,0 +1,88 @@ +From b670d415102966b223bd2ed7bcb96ff6f4f769fd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 16 Dec 2024 20:43:03 +0800 +Subject: [PATCH 12/59] LoongArch: Add CRC expander to generate faster CRC + +64-bit LoongArch has native CRC instructions for two specific +polynomials. For other polynomials or 32-bit, use the generic +table-based approach but optimize bit reversing. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (crc_revsi4): New + define_expand. +--- + gcc/config/loongarch/loongarch.md | 57 +++++++++++++++++++++++++++++++ + 1 file changed, 57 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ebf633776..e2d6a1813 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4441,6 +4441,63 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + ++(define_expand "crc_revsi4" ++ [(match_operand:SI 0 "register_operand") ; new_chksum ++ (match_operand:SI 1 "register_operand") ; old_chksum ++ (match_operand:SUBDI 2 "reg_or_0_operand") ; msg ++ (match_operand 3 "const_int_operand")] ; poly ++ "" ++ { ++ unsigned HOST_WIDE_INT poly = UINTVAL (operands[3]); ++ rtx msg = operands[2]; ++ rtx (*crc_insn)(rtx, rtx, rtx) = nullptr; ++ ++ /* TODO: Review this when adding LA32 support. If we're going to ++ support CRC instructions on LA32 we'll need a "-mcrc" switch as ++ they are optional on LA32. */ ++ ++ if (TARGET_64BIT) ++ { ++ if (poly == reflect_hwi (0xedb88320u, 32)) ++ crc_insn = gen_loongarch_crc_w__w; ++ else if (poly == reflect_hwi (0x82f63b78u, 32)) ++ crc_insn = gen_loongarch_crcc_w__w; ++ } ++ ++ if (crc_insn) ++ { ++ /* We cannot make crc_insn to accept const0_rtx easily: ++ it's not possible to figure out the mode of const0_rtx so we'd ++ have to separate both UNSPEC_CRC and UNSPEC_CRCC to 4 different ++ UNSPECs. Instead just hack it around here. */ ++ if (msg == const0_rtx) ++ msg = gen_rtx_REG (mode, 0); ++ ++ emit_insn (crc_insn (operands[0], msg, operands[1])); ++ } ++ else ++ { ++ /* No CRC instruction is suitable, use the generic table-based ++ implementation but optimize bit reversion. */ ++ auto rbit = [](rtx *r) ++ { ++ /* Well, this is ugly. The problem is ++ expand_reversed_crc_table_based only accepts one helper ++ for reversing data elements and CRC states. */ ++ auto mode = GET_MODE (*r); ++ auto rbit = (mode == mode ? gen_rbit : gen_rbitsi); ++ rtx out = gen_reg_rtx (mode); ++ ++ emit_insn (rbit (out, *r)); ++ *r = out; ++ }; ++ expand_reversed_crc_table_based (operands[0], operands[1], ++ msg, operands[3], mode, ++ rbit); ++ } ++ DONE; ++ }) ++ + ;; With normal or medium code models, if the only use of a pc-relative + ;; address is for loading or storing a value, then relying on linker + ;; relaxation is not better than emitting the machine instruction directly. +-- +2.47.3 + diff --git a/0013-LoongArch-Combine-xor-and-crc-instructions.patch b/0013-LoongArch-Combine-xor-and-crc-instructions.patch new file mode 100644 index 0000000..5e83ac0 --- /dev/null +++ b/0013-LoongArch-Combine-xor-and-crc-instructions.patch @@ -0,0 +1,83 @@ +From 3f2941cee6aac8857708317961771d1b6e538d0f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 5 Dec 2024 14:19:02 +0800 +Subject: [PATCH 13/59] LoongArch: Combine xor and crc instructions + +For a textbook-style CRC implementation: + + uint32_t crc = 0xffffffffu; + for (size_t k = 0; k < len; k++) + { + crc ^= data[k]; + for (int i = 0; i < 8 * sizeof (T); i++) + if (crc & 1) + crc = (crc >> 1) ^ poly; + else + crc >>= 1; + } + return crc; + +The generic code reports: + + Data and CRC are xor-ed before for loop. Initializing data with 0. + +resulting in: + + ld.bu $t1, $a0, 0 + xor $t0, $t0, $t1 + crc.w.b.w $t0, $zero, $t0 + +But it's just better to use + + ld.bu $t1, $a0, 0 + crc.w.b.w $t0, $t1, $t0 + +instead. Implement this optimization now. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (*crc_combine): New + define_insn_and_split. +--- + gcc/config/loongarch/loongarch.md | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index e2d6a1813..4b8caeda7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4498,6 +4498,31 @@ + DONE; + }) + ++(define_insn_and_split "*crc_combine" ++ [(set (match_operand:SI 0 "register_operand" "=r,r") ++ (unspec:SI ++ [(reg:SUBDI 0) ++ (subreg:SI ++ (xor:DI ++ (match_operand:DI 1 "register_operand" "r,r") ++ ; Our LOAD_EXTEND_OP makes this same as sign_extend ++ ; if SUBDI is SI, or zero_extend if SUBDI is QI or HI. ++ ; For the former the high bits in rk are ignored by ++ ; crc.w.w.w anyway, for the latter the zero extension is ++ ; necessary for the correctness of this transformation. ++ (subreg:DI ++ (match_operand:SUBDI 2 "memory_operand" "m,k") 0)) 0)] ++ CRC))] ++ "TARGET_64BIT && loongarch_pre_reload_split ()" ++ "#" ++ "&& true" ++ [(set (match_dup 3) (match_dup 2)) ++ (set (match_dup 0) ++ (unspec:SI [(match_dup 3) (subreg:SI (match_dup 1) 0)] CRC))] ++ { ++ operands[3] = gen_reg_rtx (mode); ++ }) ++ + ;; With normal or medium code models, if the only use of a pc-relative + ;; address is for loading or storing a value, then relying on linker + ;; relaxation is not better than emitting the machine instruction directly. +-- +2.47.3 + diff --git a/0014-LoongArch-Add-crc-tests.patch b/0014-LoongArch-Add-crc-tests.patch new file mode 100644 index 0000000..1a3dfe0 --- /dev/null +++ b/0014-LoongArch-Add-crc-tests.patch @@ -0,0 +1,164 @@ +From e3e45a386a9ffbf5a75548a459840199bdcacf6c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 13 Dec 2024 15:46:00 +0800 +Subject: [PATCH 14/59] LoongArch: Add crc tests + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/crc.C: New test. + * g++.target/loongarch/crc-scan.C: New test. +--- + gcc/testsuite/g++.target/loongarch/crc-scan.C | 13 ++ + gcc/testsuite/g++.target/loongarch/crc.C | 120 ++++++++++++++++++ + 2 files changed, 133 insertions(+) + create mode 100644 gcc/testsuite/g++.target/loongarch/crc-scan.C + create mode 100644 gcc/testsuite/g++.target/loongarch/crc.C + +diff --git a/gcc/testsuite/g++.target/loongarch/crc-scan.C b/gcc/testsuite/g++.target/loongarch/crc-scan.C +new file mode 100644 +index 000000000..971580f0d +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/crc-scan.C +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64" } */ ++ ++#include "crc.C" ++ ++/* { dg-final { scan-assembler-times "crc\\.w\\.b\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "crc\\.w\\.h\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "crc\\.w\\.w\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "crcc\\.w\\.b\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "crcc\\.w\\.h\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "crcc\\.w\\.w\\.w" 2 } } */ ++/* { dg-final { scan-assembler-not "crc\\.w\\.\[bhw\]\\.w\t\\\$r\[0-9\]+,\\\$r0" } } */ ++/* { dg-final { scan-assembler-not "crcc\\.w\\.\[bhw\]\\.w\t\\\$r\[0-9\]+,\\\$r0" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/crc.C b/gcc/testsuite/g++.target/loongarch/crc.C +new file mode 100644 +index 000000000..16df4a1ae +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/crc.C +@@ -0,0 +1,120 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++typedef __UINT8_TYPE__ uint8_t; ++typedef __UINT16_TYPE__ uint16_t; ++typedef __UINT32_TYPE__ uint32_t; ++typedef __UINT64_TYPE__ uint64_t; ++typedef __SIZE_TYPE__ size_t; ++ ++template ++__attribute__ ((always_inline)) inline uint32_t ++crc32_impl (const T *data, size_t len) ++{ ++ uint32_t ret = 0xffffffffu; ++ for (size_t k = 0; k < len; k++) ++ { ++ ret ^= data[k]; ++ for (int i = 0; i < 8 * sizeof (T); i++) ++ if (ret & 1) ++ ret = (ret >> 1) ^ poly; ++ else ++ ret >>= 1; ++ } ++ return ret; ++} ++ ++template ++__attribute__ ((noipa, optimize (0))) uint32_t ++crc32_ref (const T *data, size_t len) ++{ ++ return crc32_impl (data, len); ++} ++ ++template ++__attribute__ ((noipa)) uint32_t ++crc32_opt (const T *data, size_t len) ++{ ++ return crc32_impl (data, len); ++} ++ ++template ++__attribute__ ((noipa)) uint32_t ++crc32_alt (const T *data, size_t len) ++{ ++ uint32_t ret = 0xffffffffu; ++ for (size_t k = 0; k < len; k++) ++ { ++ T x = data[k]; ++ for (int i = 0; i < 8 * sizeof (T); i++) ++ { ++ if ((ret & 1) ^ (x & 1)) ++ ret = (ret >> 1) ^ poly; ++ else ++ ret >>= 1; ++ x >>= 1; ++ } ++ } ++ return ret; ++} ++ ++union test_data_t ++{ ++ uint8_t u8[1024]; ++ uint16_t u16[512]; ++ uint32_t u32[256]; ++ ++ operator const uint8_t * () const { return u8; } ++ operator const uint16_t * () const { return u16; } ++ operator const uint32_t * () const { return u32; } ++ ++ constexpr ++ test_data_t () ++ : u8{} ++ { ++ } ++}; ++ ++/* Generate test data at compile time with minstd_rand0 algorithm. */ ++constexpr test_data_t ++gen (uint64_t seed) ++{ ++ uint64_t state = seed; ++ test_data_t ret; ++ for (int i = 0; i < sizeof (ret); i++) ++ { ++ state = state * 16807 % 2147483647; ++ ret.u8[i] = (uint8_t)state; ++ } ++ return ret; ++} ++ ++constexpr union test_data_t test_data = gen (0xdeadbeef); ++ ++void ++assert_eq (uint32_t x, uint32_t y) ++{ ++ if (x != y) ++ __builtin_trap (); ++} ++ ++template ++void ++test_crc32 () ++{ ++ constexpr size_t len = sizeof (test_data) / sizeof (T); ++ uint32_t ref = crc32_ref (test_data, len); ++ assert_eq (ref, crc32_opt (test_data, len)); ++ assert_eq (ref, crc32_alt (test_data, len)); ++} ++ ++int ++main (void) ++{ ++ test_crc32 (); ++ test_crc32 (); ++ test_crc32 (); ++ test_crc32 (); ++ test_crc32 (); ++ test_crc32 (); ++} +-- +2.47.3 + diff --git a/0015-LoongArch-Implement-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch b/0015-LoongArch-Implement-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch new file mode 100644 index 0000000..1602067 --- /dev/null +++ b/0015-LoongArch-Implement-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch @@ -0,0 +1,78 @@ +From cd6b7a2407efee1ca8d57d31eb4f20dac5a059a9 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Tue, 17 Dec 2024 10:41:48 +0800 +Subject: [PATCH 15/59] LoongArch: Implement + TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook + +The hook changes the allocno class to either FP_REGS or GR_REGS depending on +the mode of the register. This results in better register allocation overall, +fewer spills and reduced codesize - particularly in SPEC2017 lbm. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_ira_change_pseudo_allocno_class): New function. + (TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define macro. +--- + gcc/config/loongarch/loongarch.cc | 38 +++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5cce912bc..4bfd6ec8c 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6979,6 +6979,40 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + ++/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. ++ ++ The register allocator chooses ALL_REGS if FP_REGS and GR_REGS have the ++ same cost - even if ALL_REGS has a much higher cost. ALL_REGS is also used ++ if the cost of both FP_REGS and GR_REGS is lower than the memory cost (in ++ this case the best class is the lowest cost one). Using ALL_REGS ++ irrespectively of itself cost results in bad allocations with many redundant ++ int<->FP moves which are expensive on various cores. ++ ++ To avoid this we don't allow ALL_REGS as the allocno class, but force a ++ decision between FP_REGS and GR_REGS. We use the allocno class if it isn't ++ ALL_REGS. Similarly, use the best class if it isn't ALL_REGS. Otherwise Set ++ the allocno class depending on the mode. ++ ++ This change has a similar effect to increasing the cost of FPR->GPR register ++ moves for integer modes so that they are higher than the cost of memory but ++ changing the allocno class is more reliable. */ ++ ++static reg_class_t ++loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, ++ reg_class_t best_class) ++{ ++ enum machine_mode mode; ++ ++ if (allocno_class != ALL_REGS) ++ return allocno_class; ++ ++ if (best_class != ALL_REGS) ++ return best_class; ++ ++ mode = PSEUDO_REGNO_MODE (regno); ++ return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GR_REGS; ++} ++ + /* Implement TARGET_VALID_POINTER_MODE. */ + + static bool +@@ -11125,6 +11159,10 @@ loongarch_asm_code_end (void) + #undef TARGET_SECONDARY_RELOAD + #define TARGET_SECONDARY_RELOAD loongarch_secondary_reload + ++#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS ++#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ ++ loongarch_ira_change_pseudo_allocno_class ++ + #undef TARGET_HAVE_SPECULATION_SAFE_VALUE + #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + +-- +2.47.3 + diff --git a/0016-LoongArch-Support-immediate_operand-for-vec_cmp.patch b/0016-LoongArch-Support-immediate_operand-for-vec_cmp.patch new file mode 100644 index 0000000..d21b14a --- /dev/null +++ b/0016-LoongArch-Support-immediate_operand-for-vec_cmp.patch @@ -0,0 +1,317 @@ +From 919a2323eaed231f46c681d720a22236f09e3dee Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 18 Dec 2024 15:45:17 +0800 +Subject: [PATCH 16/59] LoongArch: Support immediate_operand for vec_cmp + +We can't vectorize the code into instructions like vslti.w that compare +with immediate_operand, because we miss immediate_operand support for +integer comparisons. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_cmp): Remove. + (vec_cmpu): Remove. + * config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp): + Ensure vector comparison instructions support CMP_OP1. + * config/loongarch/lsx.md (vec_cmp): Remove. + (vec_cmpu): Remove. + * config/loongarch/simd.md (ALLVEC, allmode_i): New mode iterators. + (vec_cmp): New define_expand. + (vec_cmpu): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-3.c: New test. +--- + gcc/config/loongarch/lasx.md | 25 ------ + gcc/config/loongarch/loongarch.cc | 12 +++ + gcc/config/loongarch/lsx.md | 25 ------ + gcc/config/loongarch/simd.md | 40 +++++++++ + .../loongarch/vector/lasx/lasx-vcond-3.c | 81 +++++++++++++++++++ + 5 files changed, 133 insertions(+), 50 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index e4ea2e4fa..a4505dcbd 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -162,9 +162,6 @@ + UNSPEC_LASX_XVILVL_INTERNAL + ]) + +-;; All vector modes with 256 bits. +-(define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI]) +- + ;; Only used for splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LASX_D [V4DI V4DF]) + +@@ -1365,28 +1362,6 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_expand "vec_cmp" +- [(set (match_operand: 0 "register_operand") +- (match_operator 1 "" +- [(match_operand:LASX 2 "register_operand") +- (match_operand:LASX 3 "register_operand")]))] +- "ISA_HAS_LASX" +-{ +- loongarch_expand_vec_cmp (operands); +- DONE; +-}) +- +-(define_expand "vec_cmpu" +- [(set (match_operand: 0 "register_operand") +- (match_operator 1 "" +- [(match_operand:ILASX 2 "register_operand") +- (match_operand:ILASX 3 "register_operand")]))] +- "ISA_HAS_LASX" +-{ +- loongarch_expand_vec_cmp (operands); +- DONE; +-}) +- + (define_insn "lasx_xvfclass_" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLASX 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4bfd6ec8c..069e9cc33 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10388,19 +10388,29 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + switch (cond) + { + case NE: ++ if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15)) ++ op1 = force_reg (cmp_mode, op1); + cond = reverse_condition (cond); + negate = true; + break; + case EQ: + case LT: + case LE: ++ if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15)) ++ op1 = force_reg (cmp_mode, op1); ++ break; + case LTU: + case LEU: ++ if (!loongarch_const_vector_same_int_p (op1, cmp_mode, 0, 31)) ++ op1 = force_reg (cmp_mode, op1); + break; + case GE: + case GT: + case GEU: + case GTU: ++ /* Only supports reg-reg comparison. */ ++ if (!register_operand (op1, cmp_mode)) ++ op1 = force_reg (cmp_mode, op1); + std::swap (op0, op1); + cond = swap_condition (cond); + break; +@@ -10416,6 +10426,8 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + case E_V2DFmode: + case E_V8SFmode: + case E_V4DFmode: ++ if (!register_operand (op1, cmp_mode)) ++ op1 = force_reg (cmp_mode, op1); + loongarch_emit_binary (cond, dest, op0, op1); + break; + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index d05fd8549..0d422f9e1 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -183,9 +183,6 @@ + (V8HI "V2DI") + (V16QI "V4SI")]) + +-;; All vector modes with 128 bits. +-(define_mode_iterator LSX [V2DF V4SF V2DI V4SI V8HI V16QI]) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D [V2DI V2DF]) + +@@ -508,28 +505,6 @@ + DONE; + }) + +-(define_expand "vec_cmp" +- [(set (match_operand: 0 "register_operand") +- (match_operator 1 "" +- [(match_operand:LSX 2 "register_operand") +- (match_operand:LSX 3 "register_operand")]))] +- "ISA_HAS_LSX" +-{ +- loongarch_expand_vec_cmp (operands); +- DONE; +-}) +- +-(define_expand "vec_cmpu" +- [(set (match_operand: 0 "register_operand") +- (match_operator 1 "" +- [(match_operand:ILSX 2 "register_operand") +- (match_operand:ILSX 3 "register_operand")]))] +- "ISA_HAS_LSX" +-{ +- loongarch_expand_vec_cmp (operands); +- DONE; +-}) +- + (define_expand "vcond_mask_" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "reg_or_m1_operand") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 9e4c08196..28efcea51 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -29,6 +29,15 @@ + ;; FP modes supported by LASX + (define_mode_iterator FLASX [V4DF V8SF]) + ++;; All modes supported by LSX ++(define_mode_iterator LSX [ILSX FLSX]) ++ ++;; ALL modes supported by LASX ++(define_mode_iterator LASX [ILASX FLASX]) ++ ++;; All vector modes available ++(define_mode_iterator ALLVEC [(LSX "ISA_HAS_LSX") (LASX "ISA_HAS_LASX")]) ++ + ;; All integer modes available + ;; (define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) + (define_mode_iterator IVEC [(V2DI "ISA_HAS_LSX") (V4SI "ISA_HAS_LSX") (V8HI "ISA_HAS_LSX") (V16QI "ISA_HAS_LSX") +@@ -75,6 +84,14 @@ + (define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si") + (V4DF "v4di") (V8SF "v8si")]) + ++;; Integer vector modes with the same size, in lower-case. ++(define_mode_attr allmode_i [(V2DI "v2di") (V4SI "v4si") ++ (V8HI "v8hi") (V16QI "v16qi") ++ (V2DF "v2di") (V4SF "v4si") ++ (V4DI "v4di") (V8SI "v8si") ++ (V16HI "v16hi") (V32QI "v32qi") ++ (V4DF "v4di") (V8SF "v8si")]) ++ + ;; Suffix for LSX or LASX instructions. + (define_mode_attr simdfmt [(V2DF "d") (V4DF "d") + (V4SF "s") (V8SF "s") +@@ -479,6 +496,29 @@ + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + ++;; vector compare ++(define_expand "vec_cmp" ++ [(set (match_operand: 0 "register_operand") ++ (match_operator 1 "" ++ [(match_operand:ALLVEC 2 "register_operand") ++ (match_operand:ALLVEC 3 "nonmemory_operand")]))] ++ "" ++{ ++ loongarch_expand_vec_cmp (operands); ++ DONE; ++}) ++ ++(define_expand "vec_cmpu" ++ [(set (match_operand: 0 "register_operand") ++ (match_operator 1 "" ++ [(match_operand:IVEC 2 "register_operand") ++ (match_operand:IVEC 3 "nonmemory_operand")]))] ++ "" ++{ ++ loongarch_expand_vec_cmp (operands); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c +new file mode 100644 +index 000000000..17545f445 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c +@@ -0,0 +1,81 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */ ++ ++#include ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX, IMM) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = x[i], yval = y[i]; \ ++ CMP_TYPE aval = a[i], bval = IMM; \ ++ r[i] = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX, 0) \ ++ T (int16_t, int16_t, COND, SUFFIX, 0) \ ++ T (int32_t, int32_t, COND, SUFFIX, 0) \ ++ T (int64_t, int64_t, COND, SUFFIX, 0) \ ++ T (float, int32_t, COND, SUFFIX##_float, 0) \ ++ T (double, int64_t, COND, SUFFIX##_double, 0) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX, 2) \ ++ T (uint16_t, uint16_t, COND, SUFFIX, 2) \ ++ T (uint32_t, uint32_t, COND, SUFFIX, 2) \ ++ T (uint64_t, uint64_t, COND, SUFFIX, 2) \ ++ T (float, uint32_t, COND, SUFFIX##_float, 2) \ ++ T (double, uint64_t, COND, SUFFIX##_double, 2) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\txvslti\.b\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\txvslti\.h\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\txvslti\.w\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslti\.d\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslti\.b\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\tvslti\.h\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\tvslti\.w\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslti\.d\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.b\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.h\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.w\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.d\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.b\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.h\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.w\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.d\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.bu\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.hu\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.wu\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslei\.du\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.bu\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.hu\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.wu\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslei\.du\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseqi\.b\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseqi\.h\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseqi\.w\t} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvseqi\.d\t} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvseqi\.b\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseqi\.h\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseqi\.w\t} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvseqi\.d\t} 8 } } */ +-- +2.47.3 + diff --git a/0017-LoongArch-Implement-vector-cbranch-optab-for-LSX-and.patch b/0017-LoongArch-Implement-vector-cbranch-optab-for-LSX-and.patch new file mode 100644 index 0000000..b207be2 --- /dev/null +++ b/0017-LoongArch-Implement-vector-cbranch-optab-for-LSX-and.patch @@ -0,0 +1,120 @@ +From 008ea59916467ad85a6dc889e6ff71a907e7875f Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 25 Dec 2024 17:59:36 +0800 +Subject: [PATCH 17/59] LoongArch: Implement vector cbranch optab for LSX and + LASX + +In order to support vectorization of loops with multiple exits, this +patch adds the implementation of the conditional branch optab for +LoongArch LSX/LASX instructions. + +This patch causes the gen-vect-{2,25}.c tests to fail. This is because +the support for vectorizing loops with multiple exits has vectorized +the loop checking the results. The failure is due to an issue in the +test case's own implementation. + +gcc/ChangeLog: + + * config/loongarch/simd.md (cbranch4): New expander. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp (check_effective_target_vect_early_break_hw, + check_effective_target_vect_early_break): Support LoongArch LSX. + * gcc.target/loongarch/vector/lasx/lasx-vseteqz.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vseteqz.c: New test. + +Co-authored-by: Deng Jianbo +--- + gcc/config/loongarch/simd.md | 30 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-vseteqz.c | 14 +++++++++ + .../loongarch/vector/lsx/lsx-vseteqz.c | 15 ++++++++++ + 3 files changed, 59 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vseteqz.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseteqz.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 28efcea51..d7f1e6ea2 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -519,6 +519,36 @@ + DONE; + }) + ++;; cbranch ++(define_expand "cbranch4" ++ [(set (pc) ++ (if_then_else ++ (match_operator 0 "equality_operator" ++ [(match_operand:IVEC 1 "register_operand") ++ (match_operand:IVEC 2 "reg_or_vector_same_val_operand")]) ++ (label_ref (match_operand 3 "")) ++ (pc)))] ++ "" ++{ ++ RTX_CODE code = GET_CODE (operands[0]); ++ rtx tmp = operands[1]; ++ rtx const0 = CONST0_RTX (SImode); ++ ++ /* If comparing against a non-zero vector we have to do a comparison first ++ so we can have a != 0 comparison with the result. */ ++ if (operands[2] != CONST0_RTX (mode)) ++ { ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_xor3 (tmp, operands[1], operands[2])); ++ } ++ ++ if (code == NE) ++ emit_jump_insn (gen__bnz_v_b (operands[3], tmp, const0)); ++ else ++ emit_jump_insn (gen__bz_v_b (operands[3], tmp, const0)); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vseteqz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vseteqz.c +new file mode 100644 +index 000000000..1f69a80a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vseteqz.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "\txvset.*.v\t" } } */ ++/* { dg-final { scan-assembler "bcnez" } } */ ++ ++int ++foo (int N) ++{ ++ for (int i = 0; i <= N; i++) ++ if (i * i == N) ++ return i; ++ return -1; ++} ++ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseteqz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseteqz.c +new file mode 100644 +index 000000000..2536bb794 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseteqz.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++/* { dg-final { scan-assembler "\tvset.*.v\t" } } */ ++/* { dg-final { scan-assembler "bcnez" } } */ ++ ++int ++foo (int N) ++{ ++ for (int i = 0; i <= N; i++) ++ if (i * i == N) ++ return i; ++ ++ return -1; ++} ++ +-- +2.47.3 + diff --git a/0018-LoongArch-Remove-useless-UNSPECs-and-define_mode_att.patch b/0018-LoongArch-Remove-useless-UNSPECs-and-define_mode_att.patch new file mode 100644 index 0000000..d93ca8c --- /dev/null +++ b/0018-LoongArch-Remove-useless-UNSPECs-and-define_mode_att.patch @@ -0,0 +1,235 @@ +From af102a0b243dd6f42cca27266d6ee159b54d6f52 Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 10:34:56 +0800 +Subject: [PATCH 18/59] LoongArch: Remove useless UNSPECs and define_mode_attrs + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Remove useless code. + * config/loongarch/lsx.md: Ditto. +--- + gcc/config/loongarch/lasx.md | 66 ------------------------------------ + gcc/config/loongarch/lsx.md | 35 ------------------- + 2 files changed, 101 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index a4505dcbd..d80791067 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -37,16 +37,12 @@ + UNSPEC_LASX_XVFCVTH + UNSPEC_LASX_XVFCVTL + UNSPEC_LASX_XVFLOGB +- UNSPEC_LASX_XVFRECIP + UNSPEC_LASX_XVFRECIPE +- UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE + UNSPEC_LASX_XVFTINT_U +- UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S + UNSPEC_LASX_XVSAT_U +- UNSPEC_LASX_XVREPLVE0 + UNSPEC_LASX_XVREPL128VEI + UNSPEC_LASX_XVSRAR + UNSPEC_LASX_XVSRARI +@@ -57,7 +53,6 @@ + UNSPEC_LASX_BRANCH + UNSPEC_LASX_BRANCH_V + +- UNSPEC_LASX_MXVEXTW_U + UNSPEC_LASX_XVSLLWIL_S + UNSPEC_LASX_XVSLLWIL_U + UNSPEC_LASX_XVSRAN +@@ -130,7 +125,6 @@ + UNSPEC_LASX_XVADD_Q + UNSPEC_LASX_XVSUB_Q + UNSPEC_LASX_XVREPLVE +- UNSPEC_LASX_XVSHUF4 + UNSPEC_LASX_XVMSKGEZ + UNSPEC_LASX_XVMSKNZ + UNSPEC_LASX_XVEXTH_Q_D +@@ -212,11 +206,6 @@ + (V8SI "V4SI") + (V4DI "V2DI")]) + +-;;attribute gives half float modes for vector modes. +-(define_mode_attr VFHMODE256 +- [(V8SF "V4SF") +- (V4DF "V2DF")]) +- + ;; The attribute gives half int/float modes for vector modes. + (define_mode_attr VHMODE256_ALL + [(V32QI "V16QI") +@@ -252,20 +241,6 @@ + (V4DF "V8DF") + (V4DI "V8DI")]) + +-;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc. +-(define_mode_attr VRES256 +- [(V4DF "DF") +- (V8SF "SF") +- (V4DI "DI") +- (V8SI "SI") +- (V16HI "SI") +- (V32QI "SI")]) +- +-;; Only used with LASX_D iterator. +-(define_mode_attr lasx_d +- [(V4DI "reg_or_0") +- (V4DF "register")]) +- + ;; This attribute gives the 256 bit integer vector mode with same size. + (define_mode_attr mode256_i + [(V4DF "v4di") +@@ -275,14 +250,6 @@ + (V16HI "v16hi") + (V32QI "v32qi")]) + +- +-;; This attribute gives the 256 bit float vector mode with same size. +-(define_mode_attr mode256_f +- [(V4DF "v4df") +- (V8SF "v8sf") +- (V4DI "v4df") +- (V8SI "v8sf")]) +- + ;; This attribute gives V32QI mode and V16HI mode with half size. + (define_mode_attr mode256_i_half + [(V32QI "v16qi") +@@ -344,14 +311,6 @@ + (V16HI "h") + (V32QI "b")]) + +-(define_mode_attr flasxfmt_f +- [(V4DF "d_f") +- (V8SF "s_f") +- (V4DI "d") +- (V8SI "w") +- (V16HI "h") +- (V32QI "b")]) +- + ;; This attribute gives define_insn suffix for LASX instructions that need + ;; distinction between integer and floating point. + (define_mode_attr lasxfmt_f_wd +@@ -438,27 +397,6 @@ + (V4DI "uimm6")]) + + +-(define_mode_attr d2lasxfmt +- [(V8SI "q") +- (V16HI "d") +- (V32QI "w")]) +- +-(define_mode_attr d2lasxfmt_u +- [(V8SI "qu") +- (V16HI "du") +- (V32QI "wu")]) +- +-(define_mode_attr VD2MODE256 +- [(V8SI "V4DI") +- (V16HI "V4DI") +- (V32QI "V8SI")]) +- +-(define_mode_attr lasxfmt_wd +- [(V4DI "d") +- (V8SI "w") +- (V16HI "w") +- (V32QI "w")]) +- + ;; Half modes of all LASX vector modes, in lower-case. + (define_mode_attr lasxhalf [(V32QI "v16qi") (V16HI "v8hi") + (V8SI "v4si") (V4DI "v2di") +@@ -1402,10 +1340,6 @@ + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +-(define_mode_attr FFQ256 +- [(V4SF "V16HI") +- (V2DF "V8SI")]) +- + (define_insn "lasx_xvreplgr2vr_" + [(set (match_operand:ILASX 0 "register_operand" "=f,f") + (vec_duplicate:ILASX +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 0d422f9e1..a6140b8e5 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -39,15 +39,12 @@ + UNSPEC_LSX_VFCVTH + UNSPEC_LSX_VFCVTL + UNSPEC_LSX_VFLOGB +- UNSPEC_LSX_VFRECIP + UNSPEC_LSX_VFRECIPE +- UNSPEC_LSX_VFRINT + UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE + UNSPEC_LSX_VFTINT_U + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U +- UNSPEC_LSX_VREPLVEI + UNSPEC_LSX_VSRAR + UNSPEC_LSX_VSRARI + UNSPEC_LSX_VSRLR +@@ -167,22 +164,6 @@ + (V8HI "wu") + (V16QI "hu")]) + +-(define_mode_attr d2lsxfmt +- [(V4SI "q") +- (V8HI "d") +- (V16QI "w")]) +- +-(define_mode_attr d2lsxfmt_u +- [(V4SI "qu") +- (V8HI "du") +- (V16QI "wu")]) +- +-;; The attribute gives two double modes for vector modes. +-(define_mode_attr VD2MODE +- [(V4SI "V2DI") +- (V8HI "V2DI") +- (V16QI "V4SI")]) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D [V2DI V2DF]) + +@@ -299,24 +280,12 @@ + (V8HI "h") + (V16QI "b")]) + +-(define_mode_attr flsxfmt_f +- [(V2DF "d_f") +- (V4SF "s_f") +- (V2DI "d") +- (V4SI "w") +- (V8HI "h") +- (V16QI "b")]) +- + (define_mode_attr flsxfmt + [(V2DF "d") + (V4SF "s") + (V2DI "d") + (V4SI "s")]) + +-(define_mode_attr flsxfrint +- [(V2DF "d") +- (V4SF "s")]) +- + (define_mode_attr ilsxfmt + [(V2DF "l") + (V4SF "w")]) +@@ -1275,10 +1244,6 @@ + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +-(define_mode_attr FFQ +- [(V4SF "V8HI") +- (V2DF "V4SI")]) +- + (define_insn "lsx_vreplgr2vr_" + [(set (match_operand:ILSX 0 "register_operand" "=f,f") + (vec_duplicate:ILSX +-- +2.47.3 + diff --git a/0019-LoongArch-Fix-selector-error-in-lasx_xvexth_h-w-d-pa.patch b/0019-LoongArch-Fix-selector-error-in-lasx_xvexth_h-w-d-pa.patch new file mode 100644 index 0000000..a60b87e --- /dev/null +++ b/0019-LoongArch-Fix-selector-error-in-lasx_xvexth_h-w-d-pa.patch @@ -0,0 +1,64 @@ +From 6f235651ad0a3bee0aae1244f16dc5b59c8a38fe Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 10:37:18 +0800 +Subject: [PATCH 19/59] LoongArch: Fix selector error in lasx_xvexth_h/w/d* + patterns + +The xvexth related instructions operate SEPARATELY according to +the high and low 128 bits, and sign/zero extend the upper half +of every 128 bits in src to the corresponding 128 bits in dest. + +For xvexth.d.w, the rule for the first element of dest should be: + dest.D[0] = sign_extend (src.W[2] ,64); +instead of: + dest.D[0] = sign_extend (src.W[4] ,64); + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Fix selector index. +--- + gcc/config/loongarch/lasx.md | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index d80791067..8a3684038 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -4183,10 +4183,10 @@ + (any_extend:V16HI + (vec_select:V16QI + (match_operand:V32QI 1 "register_operand" "f") +- (parallel [(const_int 16) (const_int 17) +- (const_int 18) (const_int 19) +- (const_int 20) (const_int 21) +- (const_int 22) (const_int 23) ++ (parallel [(const_int 8) (const_int 9) ++ (const_int 10) (const_int 11) ++ (const_int 12) (const_int 13) ++ (const_int 14) (const_int 15) + (const_int 24) (const_int 25) + (const_int 26) (const_int 27) + (const_int 28) (const_int 29) +@@ -4201,8 +4201,8 @@ + (any_extend:V8SI + (vec_select:V8HI + (match_operand:V16HI 1 "register_operand" "f") +- (parallel [(const_int 8) (const_int 9) +- (const_int 10) (const_int 11) ++ (parallel [(const_int 4) (const_int 5) ++ (const_int 6) (const_int 7) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] + "ISA_HAS_LASX" +@@ -4215,7 +4215,7 @@ + (any_extend:V4DI + (vec_select:V4SI + (match_operand:V8SI 1 "register_operand" "f") +- (parallel [(const_int 4) (const_int 5) ++ (parallel [(const_int 2) (const_int 3) + (const_int 6) (const_int 7)]))))] + "ISA_HAS_LASX" + "xvexth.d.w\t%u0,%u1" +-- +2.47.3 + diff --git a/0020-LoongArch-Fix-bugs-in-insn-patterns-lasx_xvrepl128ve.patch b/0020-LoongArch-Fix-bugs-in-insn-patterns-lasx_xvrepl128ve.patch new file mode 100644 index 0000000..c090487 --- /dev/null +++ b/0020-LoongArch-Fix-bugs-in-insn-patterns-lasx_xvrepl128ve.patch @@ -0,0 +1,155 @@ +From e0346aa5c2da07ee3776f54b5dc5ad25e96ca378 Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 10:37:57 +0800 +Subject: [PATCH 20/59] LoongArch: Fix bugs in insn patterns + lasx_xvrepl128vei_b/h/w/d_internal + +There are two aspects that affect the matching of instruction templates: + +1. vec_duplicate is redundant in the following operations. + set (match_operand:V4DI ...) + (vec_duplicate:V4DI (vec_select:V4DI ...)) + +2. The range of values for testing predicate const_8_to_15_operand and +const_16_to_31_operand should be [8, 15] and [16, 31] respectively. + +However, there is currently no suitable testcase to verify. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Remove useless vec_select. + * config/loongarch/predicates.md: Correct error predicate. +--- + gcc/config/loongarch/lasx.md | 76 ++++++++++++++---------------- + gcc/config/loongarch/predicates.md | 4 +- + 2 files changed, 38 insertions(+), 42 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 8a3684038..6b6bd3bf4 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -2281,21 +2281,20 @@ + + (define_insn "lasx_xvrepl128vei_b_internal" + [(set (match_operand:V32QI 0 "register_operand" "=f") +- (vec_duplicate:V32QI +- (vec_select:V32QI +- (match_operand:V32QI 1 "register_operand" "f") +- (parallel [(match_operand 2 "const_uimm4_operand" "") +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_operand 3 "const_16_to_31_operand" "") +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3) (match_dup 3) (match_dup 3)]))))] ++ (vec_select:V32QI ++ (match_operand:V32QI 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const_uimm4_operand" "") ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_operand 3 "const_16_to_31_operand" "") ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3) (match_dup 3) (match_dup 3)])))] + "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 16)" + "xvrepl128vei.b\t%u0,%u1,%2" + [(set_attr "type" "simd_splat") +@@ -2303,17 +2302,16 @@ + + (define_insn "lasx_xvrepl128vei_h_internal" + [(set (match_operand:V16HI 0 "register_operand" "=f") +- (vec_duplicate:V16HI +- (vec_select:V16HI +- (match_operand:V16HI 1 "register_operand" "f") +- (parallel [(match_operand 2 "const_uimm3_operand" "") +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_dup 2) +- (match_operand 3 "const_8_to_15_operand" "") +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3) (match_dup 3) (match_dup 3) +- (match_dup 3)]))))] ++ (vec_select:V16HI ++ (match_operand:V16HI 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const_uimm3_operand" "") ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_dup 2) ++ (match_operand 3 "const_8_to_15_operand" "") ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3) (match_dup 3) (match_dup 3) ++ (match_dup 3)])))] + "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 8)" + "xvrepl128vei.h\t%u0,%u1,%2" + [(set_attr "type" "simd_splat") +@@ -2321,13 +2319,12 @@ + + (define_insn "lasx_xvrepl128vei_w_internal" + [(set (match_operand:V8SI 0 "register_operand" "=f") +- (vec_duplicate:V8SI +- (vec_select:V8SI +- (match_operand:V8SI 1 "register_operand" "f") +- (parallel [(match_operand 2 "const_0_to_3_operand" "") +- (match_dup 2) (match_dup 2) (match_dup 2) +- (match_operand 3 "const_4_to_7_operand" "") +- (match_dup 3) (match_dup 3) (match_dup 3)]))))] ++ (vec_select:V8SI ++ (match_operand:V8SI 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const_0_to_3_operand" "") ++ (match_dup 2) (match_dup 2) (match_dup 2) ++ (match_operand 3 "const_4_to_7_operand" "") ++ (match_dup 3) (match_dup 3) (match_dup 3)])))] + "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 4)" + "xvrepl128vei.w\t%u0,%u1,%2" + [(set_attr "type" "simd_splat") +@@ -2335,13 +2332,12 @@ + + (define_insn "lasx_xvrepl128vei_d_internal" + [(set (match_operand:V4DI 0 "register_operand" "=f") +- (vec_duplicate:V4DI +- (vec_select:V4DI +- (match_operand:V4DI 1 "register_operand" "f") +- (parallel [(match_operand 2 "const_0_or_1_operand" "") +- (match_dup 2) +- (match_operand 3 "const_2_or_3_operand" "") +- (match_dup 3)]))))] ++ (vec_select:V4DI ++ (match_operand:V4DI 1 "register_operand" "f") ++ (parallel [(match_operand 2 "const_0_or_1_operand" "") ++ (match_dup 2) ++ (match_operand 3 "const_2_or_3_operand" "") ++ (match_dup 3)])))] + "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 2)" + "xvrepl128vei.d\t%u0,%u1,%2" + [(set_attr "type" "simd_splat") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index b66e0e18d..28f8a0fba 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -267,11 +267,11 @@ + + (define_predicate "const_8_to_15_operand" + (and (match_code "const_int") +- (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ (match_test "IN_RANGE (INTVAL (op), 8, 15)"))) + + (define_predicate "const_16_to_31_operand" + (and (match_code "const_int") +- (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) ++ (match_test "IN_RANGE (INTVAL (op), 16, 31)"))) + + (define_predicate "qi_mask_operand" + (and (match_code "const_int") +-- +2.47.3 + diff --git a/0021-LoongArch-Add-some-vector-pack-unpack-patterns.patch b/0021-LoongArch-Add-some-vector-pack-unpack-patterns.patch new file mode 100644 index 0000000..b8feabc --- /dev/null +++ b/0021-LoongArch-Add-some-vector-pack-unpack-patterns.patch @@ -0,0 +1,966 @@ +From 7f16efe375dfcd7eceeb7c1e94b4f7e247f2dda6 Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 10:38:51 +0800 +Subject: [PATCH 21/59] LoongArch: Add some vector pack/unpack patterns + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_unpacks_lo_): Redefine. + (vec_unpacku_lo_): Ditto. + (lasx_vext2xv_h_b): Replaced by vec_unpack_lo_v32qi. + (vec_unpack_lo_v32qi): New insn. + (lasx_vext2xv_w_h): Replaced by vec_unpack_lo_v16hi. + (vec_unpack_lo_v16qi_internal): New insn, for 128 bits. + (vec_unpack_lo_v16hi): New insn. + (lasx_vext2xv_d_w): Replaced by vec_unpack_lo_v8si. + (vec_unpack_lo_v8hi_internal): New insn, for 128 bits. + (vec_unpack_lo_v8si): New insn. + (vec_unpack_lo_v4si_internal): New insn, for 128 bits. + (vec_packs_float_v4di): New expander. + (vec_pack_sfix_trunc_v4df): Ditto. + (vec_unpacks_float_hi_v8si): Ditto. + (vec_unpacks_float_lo_v8si): Ditto. + (vec_unpack_sfix_trunc_hi_v8sf): Ditto. + (vec_unpack_sfix_trunc_lo_v8sf): Ditto. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vftintrz_w_d): Rename. + (CODE_FOR_lsx_vftintrzh_l_s): Ditto. + (CODE_FOR_lsx_vftintrzl_l_s): Ditto. + (CODE_FOR_lsx_vffint_s_l): Ditto. + (CODE_FOR_lsx_vffinth_d_w): Ditto. + (CODE_FOR_lsx_vffintl_d_w): Ditto. + (CODE_FOR_lsx_vexth_h_b): Ditto. + (CODE_FOR_lsx_vexth_w_h): Ditto. + (CODE_FOR_lsx_vexth_d_w): Ditto. + (CODE_FOR_lsx_vexth_hu_bu): Ditto. + (CODE_FOR_lsx_vexth_wu_hu): Ditto. + (CODE_FOR_lsx_vexth_du_wu): Ditto. + (CODE_FOR_lsx_vfcvth_d_s): Ditto. + (CODE_FOR_lsx_vfcvtl_d_s): Ditto. + (CODE_FOR_lasx_vext2xv_h_b): Ditto. + (CODE_FOR_lasx_vext2xv_w_h): Ditto. + (CODE_FOR_lasx_vext2xv_d_w): Ditto. + (CODE_FOR_lasx_vext2xv_hu_bu): Ditto. + (CODE_FOR_lasx_vext2xv_wu_hu): Ditto. + (CODE_FOR_lasx_vext2xv_du_wu): Ditto. + (loongarch_expand_builtin_insn): Swap source operands in + CODE_FOR_lsx_vftintrz_w_d and CODE_FOR_lsx_vffint_s_l. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_unpack): Remove useless parameter high_p. + * config/loongarch/loongarch.cc (loongarch_expand_vec_unpack): + Rewrite. + * config/loongarch/lsx.md (vec_unpacks_hi_v4sf): Redefine. + (vec_unpacks_lo_v4sf): Ditto. + (vec_unpacks_hi_): Ditto. + (vec_unpacku_hi_): Ditto. + (lsx_vfcvth_d_s): Replaced by vec_unpacks_hi_v4sf. + (lsx_vfcvtl_d_s): Replaced by vec_unpacks_lo_v4sf. + (lsx_vffint_s_l): Replaced by vec_packs_float_v2di. + (vec_packs_float_v2di): New insn. + (lsx_vftintrz_w_d): Replaced by vec_pack_sfix_trunc_v2df. + (vec_pack_sfix_trunc_v2df): New insn. + (lsx_vffinth_d_w): Replaced by vec_unpacks_float_hi_v4si. + (vec_unpacks_float_hi_v4si): New insn. + (lsx_vffintl_d_w): Replaced by vec_unpacks_float_lo_v4si. + (vec_unpacks_float_lo_v4si): New insn. + (lsx_vftintrzh_l_s): Replaced by vec_unpack_sfix_trunc_hi_v4sf. + (vec_unpack_sfix_trunc_hi_v4sf): New insn. + (lsx_vftintrzl_l_s): Replaced by vec_unpack_sfix_trunc_lo_v4sf. + (vec_unpack_sfix_trunc_lo_v4sf): New insn. + (lsx_vexth_h_b): Replaced by vec_unpack_hi_v16qi. + (vec_unpack_hi_v16qi): New insn. + (lsx_vexth_w_h): Replaced by vec_unpack_hi_v8hi. + (vec_unpack_hi_v8hi): New insn. + (lsx_vexth_d_w): Replaced by vec_unpack_hi_v4si. + (vec_unpack_hi_v4si): New insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vec_pack_unpack_128.c: New test. + * gcc.target/loongarch/vec_pack_unpack_256.c: New test. +--- + gcc/config/loongarch/lasx.md | 140 +++++++++++++++--- + gcc/config/loongarch/loongarch-builtins.cc | 22 +++ + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 49 ++---- + gcc/config/loongarch/lsx.md | 120 ++++++--------- + .../loongarch/vec_pack_unpack_128.c | 120 +++++++++++++++ + .../loongarch/vec_pack_unpack_256.c | 118 +++++++++++++++ + 7 files changed, 436 insertions(+), 135 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_128.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 6b6bd3bf4..a04bf1e37 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -463,17 +463,7 @@ + (match_operand:ILASX_WHB 1 "register_operand")] + "ISA_HAS_LASX" + { +- loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, +- true/*high_p*/); +- DONE; +-}) +- +-(define_expand "vec_unpacks_lo_" +- [(match_operand: 0 "register_operand") +- (match_operand:ILASX_WHB 1 "register_operand")] +- "ISA_HAS_LASX" +-{ +- loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/); ++ loongarch_expand_vec_unpack (operands, false/*unsigned_p*/); + DONE; + }) + +@@ -482,16 +472,7 @@ + (match_operand:ILASX_WHB 1 "register_operand")] + "ISA_HAS_LASX" + { +- loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/); +- DONE; +-}) +- +-(define_expand "vec_unpacku_lo_" +- [(match_operand: 0 "register_operand") +- (match_operand:ILASX_WHB 1 "register_operand")] +- "ISA_HAS_LASX" +-{ +- loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/); ++ loongarch_expand_vec_unpack (operands, true/*unsigned_p*/); + DONE; + }) + +@@ -2537,7 +2518,7 @@ + (set_attr "mode" "")]) + + ;; loongson-asx. +-(define_insn "lasx_vext2xv_h_b" ++(define_insn "vec_unpack_lo_v32qi" + [(set (match_operand:V16HI 0 "register_operand" "=f") + (any_extend:V16HI + (vec_select:V16QI +@@ -2555,7 +2536,21 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V16HI")]) + +-(define_insn "lasx_vext2xv_w_h" ++(define_insn "vec_unpack_lo_v16qi_internal" ++ [(set (match_operand:V8HI 0 "register_operand" "=f") ++ (any_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 1) ++ (const_int 2) (const_int 3) ++ (const_int 4) (const_int 5) ++ (const_int 6) (const_int 7)]))))] ++ "ISA_HAS_LASX" ++ "vext2xv.h.b\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V8HI")]) ++ ++(define_insn "vec_unpack_lo_v16hi" + [(set (match_operand:V8SI 0 "register_operand" "=f") + (any_extend:V8SI + (vec_select:V8HI +@@ -2569,7 +2564,19 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V8SI")]) + +-(define_insn "lasx_vext2xv_d_w" ++(define_insn "vec_unpack_lo_v8hi_internal" ++ [(set (match_operand:V4SI 0 "register_operand" "=f") ++ (any_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 1) ++ (const_int 2) (const_int 3)]))))] ++ "ISA_HAS_LASX" ++ "vext2xv.w.h\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V4SI")]) ++ ++(define_insn "vec_unpack_lo_v8si" + [(set (match_operand:V4DI 0 "register_operand" "=f") + (any_extend:V4DI + (vec_select:V4SI +@@ -2581,6 +2588,17 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4DI")]) + ++(define_insn "vec_unpack_lo_v4si_internal" ++ [(set (match_operand:V2DI 0 "register_operand" "=f") ++ (any_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 1)]))))] ++ "ISA_HAS_LASX" ++ "vext2xv.d.w\t%u0,%u1" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "V2DI")]) ++ + (define_insn "lasx_vext2xv_w_b" + [(set (match_operand:V8SI 0 "register_operand" "=f") + (any_extend:V8SI +@@ -2972,6 +2990,19 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V4DF")]) + ++(define_expand "vec_packs_float_v4di" ++ [(match_operand:V8SF 0 "register_operand") ++ (match_operand:V4DI 1 "register_operand") ++ (match_operand:V4DI 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_lasx_xvffint_s_l (tmp, operands[2], operands[1])); ++ emit_insn (gen_lasx_xvpermi_d_v8sf (operands[0], tmp, GEN_INT (0xd8))); ++ DONE; ++}) ++ + (define_insn "lasx_xvffint_s_l" + [(set (match_operand:V8SF 0 "register_operand" "=f") + (unspec:V8SF [(match_operand:V4DI 1 "register_operand" "f") +@@ -2982,6 +3013,19 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V4DI")]) + ++(define_expand "vec_pack_sfix_trunc_v4df" ++ [(match_operand:V8SI 0 "register_operand") ++ (match_operand:V4DF 1 "register_operand") ++ (match_operand:V4DF 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvftintrz_w_d (tmp, operands[2], operands[1])); ++ emit_insn (gen_lasx_xvpermi_d_v8si (operands[0], tmp, GEN_INT (0xd8))); ++ DONE; ++}) ++ + (define_insn "lasx_xvftintrz_w_d" + [(set (match_operand:V8SI 0 "register_operand" "=f") + (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f") +@@ -3040,6 +3084,30 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V8SF")]) + ++(define_expand "vec_unpacks_float_hi_v8si" ++ [(match_operand:V4DF 0 "register_operand") ++ (match_operand:V8SI 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvpermi_d_v8si (tmp, operands[1], GEN_INT (0xe8))); ++ emit_insn (gen_lasx_xvffinth_d_w (operands[0], tmp)); ++ DONE; ++}) ++ ++(define_expand "vec_unpacks_float_lo_v8si" ++ [(match_operand:V4DF 0 "register_operand") ++ (match_operand:V8SI 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V4DImode); ++ emit_insn (gen_vec_unpacks_lo_v8si (tmp, operands[1])); ++ emit_insn (gen_floatv4div4df2 (operands[0], tmp)); ++ DONE; ++}) ++ + (define_insn "lasx_xvffinth_d_w" + [(set (match_operand:V4DF 0 "register_operand" "=f") + (unspec:V4DF [(match_operand:V8SI 1 "register_operand" "f")] +@@ -3058,6 +3126,18 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V8SI")]) + ++(define_expand "vec_unpack_sfix_trunc_hi_v8sf" ++ [(match_operand:V4DI 0 "register_operand") ++ (match_operand:V8SF 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xe8))); ++ emit_insn (gen_lasx_xvftintrzh_l_s (operands[0], tmp)); ++ DONE; ++}) ++ + (define_insn "lasx_xvftintrzh_l_s" + [(set (match_operand:V4DI 0 "register_operand" "=f") + (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")] +@@ -3067,6 +3147,18 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V8SF")]) + ++(define_expand "vec_unpack_sfix_trunc_lo_v8sf" ++ [(match_operand:V4DI 0 "register_operand") ++ (match_operand:V8SF 1 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx tmp; ++ tmp = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xd4))); ++ emit_insn (gen_lasx_xvftintrzl_l_s (operands[0], tmp)); ++ DONE; ++}) ++ + (define_insn "lasx_xvftintrzl_l_s" + [(set (match_operand:V4DI 0 "register_operand" "=f") + (unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 867852a91..d3fb99ad0 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -282,10 +282,24 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vftintrz_l_d CODE_FOR_fix_truncv2dfv2di2 + #define CODE_FOR_lsx_vftintrz_wu_s CODE_FOR_fixuns_truncv4sfv4si2 + #define CODE_FOR_lsx_vftintrz_lu_d CODE_FOR_fixuns_truncv2dfv2di2 ++#define CODE_FOR_lsx_vftintrz_w_d CODE_FOR_vec_pack_sfix_trunc_v2df ++#define CODE_FOR_lsx_vftintrzh_l_s CODE_FOR_vec_unpack_sfix_trunc_hi_v4sf ++#define CODE_FOR_lsx_vftintrzl_l_s CODE_FOR_vec_unpack_sfix_trunc_lo_v4sf + #define CODE_FOR_lsx_vffint_s_w CODE_FOR_floatv4siv4sf2 + #define CODE_FOR_lsx_vffint_d_l CODE_FOR_floatv2div2df2 + #define CODE_FOR_lsx_vffint_s_wu CODE_FOR_floatunsv4siv4sf2 + #define CODE_FOR_lsx_vffint_d_lu CODE_FOR_floatunsv2div2df2 ++#define CODE_FOR_lsx_vffint_s_l CODE_FOR_vec_packs_float_v2di ++#define CODE_FOR_lsx_vffinth_d_w CODE_FOR_vec_unpacks_float_hi_v4si ++#define CODE_FOR_lsx_vffintl_d_w CODE_FOR_vec_unpacks_float_lo_v4si ++#define CODE_FOR_lsx_vexth_h_b CODE_FOR_vec_unpacks_hi_v16qi ++#define CODE_FOR_lsx_vexth_w_h CODE_FOR_vec_unpacks_hi_v8hi ++#define CODE_FOR_lsx_vexth_d_w CODE_FOR_vec_unpacks_hi_v4si ++#define CODE_FOR_lsx_vexth_hu_bu CODE_FOR_vec_unpacku_hi_v16qi ++#define CODE_FOR_lsx_vexth_wu_hu CODE_FOR_vec_unpacku_hi_v8hi ++#define CODE_FOR_lsx_vexth_du_wu CODE_FOR_vec_unpacku_hi_v4si ++#define CODE_FOR_lsx_vfcvth_d_s CODE_FOR_vec_unpacks_hi_v4sf ++#define CODE_FOR_lsx_vfcvtl_d_s CODE_FOR_vec_unpacks_lo_v4sf + #define CODE_FOR_lsx_vfsub_s CODE_FOR_subv4sf3 + #define CODE_FOR_lsx_vfsub_d CODE_FOR_subv2df3 + #define CODE_FOR_lsx_vfmul_s CODE_FOR_mulv4sf3 +@@ -563,6 +577,12 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvffint_d_l CODE_FOR_floatv4div4df2 + #define CODE_FOR_lasx_xvffint_s_wu CODE_FOR_floatunsv8siv8sf2 + #define CODE_FOR_lasx_xvffint_d_lu CODE_FOR_floatunsv4div4df2 ++#define CODE_FOR_lasx_vext2xv_h_b CODE_FOR_vec_unpacks_lo_v32qi ++#define CODE_FOR_lasx_vext2xv_w_h CODE_FOR_vec_unpacks_lo_v16hi ++#define CODE_FOR_lasx_vext2xv_d_w CODE_FOR_vec_unpacks_lo_v8si ++#define CODE_FOR_lasx_vext2xv_hu_bu CODE_FOR_vec_unpacku_lo_v32qi ++#define CODE_FOR_lasx_vext2xv_wu_hu CODE_FOR_vec_unpacku_lo_v16hi ++#define CODE_FOR_lasx_vext2xv_du_wu CODE_FOR_vec_unpacku_lo_v8si + #define CODE_FOR_lasx_xvfsub_s CODE_FOR_subv8sf3 + #define CODE_FOR_lasx_xvfsub_d CODE_FOR_subv4df3 + #define CODE_FOR_lasx_xvfmul_s CODE_FOR_mulv8sf3 +@@ -2752,6 +2772,8 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lsx_vpickod_h: + case CODE_FOR_lsx_vpickod_w: + case CODE_FOR_lsx_vandn_v: ++ case CODE_FOR_lsx_vftintrz_w_d: ++ case CODE_FOR_lsx_vffint_s_l: + case CODE_FOR_lasx_xvilvh_b: + case CODE_FOR_lasx_xvilvh_h: + case CODE_FOR_lasx_xvilvh_w: +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index deea5e675..008eb28a9 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -171,7 +171,7 @@ extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs, + + extern void loongarch_expand_vector_group_init (rtx, rtx); + extern void loongarch_expand_vector_init (rtx, rtx); +-extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool); ++extern void loongarch_expand_vec_unpack (rtx op[2], bool); + extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx); + extern void loongarch_expand_vec_perm_1 (rtx[]); + extern void loongarch_expand_vector_extract (rtx, rtx, int); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 069e9cc33..8d29924f3 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9748,7 +9748,7 @@ loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) + /* Expand an integral vector unpack operation. */ + + void +-loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) ++loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p) + { + machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack) (rtx, rtx, rtx); +@@ -9757,31 +9757,32 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx); + rtx tmp, dest; + ++ /* In LASX, only vec_unpacks_hi_ requires expander. */ + if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32) + { + switch (imode) + { + case E_V8SImode: + if (unsigned_p) +- extend = gen_lasx_vext2xv_du_wu; ++ extend = gen_vec_unpacku_lo_v8si; + else +- extend = gen_lasx_vext2xv_d_w; ++ extend = gen_vec_unpacks_lo_v8si; + swap_hi_lo = gen_lasx_xvpermi_q_v8si; + break; + + case E_V16HImode: + if (unsigned_p) +- extend = gen_lasx_vext2xv_wu_hu; ++ extend = gen_vec_unpacku_lo_v16hi; + else +- extend = gen_lasx_vext2xv_w_h; ++ extend = gen_vec_unpacks_lo_v16hi; + swap_hi_lo = gen_lasx_xvpermi_q_v16hi; + break; + + case E_V32QImode: + if (unsigned_p) +- extend = gen_lasx_vext2xv_hu_bu; ++ extend = gen_vec_unpacku_lo_v32qi; + else +- extend = gen_lasx_vext2xv_h_b; ++ extend = gen_vec_unpacks_lo_v32qi; + swap_hi_lo = gen_lasx_xvpermi_q_v32qi; + break; + +@@ -9790,46 +9791,28 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) + break; + } + +- if (high_p) +- { +- tmp = gen_reg_rtx (imode); +- emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx)); +- emit_insn (extend (operands[0], tmp)); +- return; +- } +- +- emit_insn (extend (operands[0], operands[1])); ++ tmp = gen_reg_rtx (imode); ++ emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx)); ++ emit_insn (extend (operands[0], tmp)); + return; +- + } +- else if (ISA_HAS_LSX) ++ /* In LSX, only vec_unpacks_lo_ requires expander. */ ++ else if (ISA_HAS_LSX && !ISA_HAS_LASX) + { + switch (imode) + { + case E_V4SImode: +- if (high_p != 0) +- unpack = gen_lsx_vilvh_w; +- else +- unpack = gen_lsx_vilvl_w; +- ++ unpack = gen_lsx_vilvl_w; + cmpFunc = gen_lsx_vslt_w; + break; + + case E_V8HImode: +- if (high_p != 0) +- unpack = gen_lsx_vilvh_h; +- else +- unpack = gen_lsx_vilvl_h; +- ++ unpack = gen_lsx_vilvl_h; + cmpFunc = gen_lsx_vslt_h; + break; + + case E_V16QImode: +- if (high_p != 0) +- unpack = gen_lsx_vilvh_b; +- else +- unpack = gen_lsx_vilvl_b; +- ++ unpack = gen_lsx_vilvl_b; + cmpFunc = gen_lsx_vslt_b; + break; + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index a6140b8e5..22a3e3fee 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -73,16 +73,10 @@ + UNSPEC_LSX_VMSKLTZ + UNSPEC_LSX_VSIGNCOV + UNSPEC_LSX_VFTINT_W_D +- UNSPEC_LSX_VFFINT_S_L +- UNSPEC_LSX_VFTINTRZ_W_D + UNSPEC_LSX_VFTINTRP_W_D + UNSPEC_LSX_VFTINTRM_W_D + UNSPEC_LSX_VFTINTRNE_W_D + UNSPEC_LSX_VFTINTL_L_S +- UNSPEC_LSX_VFFINTH_D_W +- UNSPEC_LSX_VFFINTL_D_W +- UNSPEC_LSX_VFTINTRZL_L_S +- UNSPEC_LSX_VFTINTRZH_L_S + UNSPEC_LSX_VFTINTRPL_L_S + UNSPEC_LSX_VFTINTRPH_L_S + UNSPEC_LSX_VFTINTRMH_L_S +@@ -336,54 +330,15 @@ + [(set_attr "type" "simd_permute") + (set_attr "mode" "")]) + +-(define_expand "vec_unpacks_hi_v4sf" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (float_extend:V2DF +- (vec_select:V2SF +- (match_operand:V4SF 1 "register_operand" "f") +- (match_dup 2))))] +- "ISA_HAS_LSX" +-{ +- operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode, +- true/*high_p*/); +-}) +- +-(define_expand "vec_unpacks_lo_v4sf" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (float_extend:V2DF +- (vec_select:V2SF +- (match_operand:V4SF 1 "register_operand" "f") +- (match_dup 2))))] +- "ISA_HAS_LSX" +-{ +- operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode, +- false/*high_p*/); +-}) +- +-(define_expand "vec_unpacks_hi_" +- [(match_operand: 0 "register_operand") +- (match_operand:ILSX_WHB 1 "register_operand")] +- "ISA_HAS_LSX" +-{ +- loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/); +- DONE; +-}) +- + (define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand") + (match_operand:ILSX_WHB 1 "register_operand")] + "ISA_HAS_LSX" + { +- loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/); +- DONE; +-}) +- +-(define_expand "vec_unpacku_hi_" +- [(match_operand: 0 "register_operand") +- (match_operand:ILSX_WHB 1 "register_operand")] +- "ISA_HAS_LSX" +-{ +- loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/); ++ if (ISA_HAS_LASX) ++ emit_insn (gen_vec_unpacks_lo__internal (operands[0], operands[1])); ++ else ++ loongarch_expand_vec_unpack (operands, false/*unsigned_p*/); + DONE; + }) + +@@ -392,7 +347,10 @@ + (match_operand:ILSX_WHB 1 "register_operand")] + "ISA_HAS_LSX" + { +- loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/); ++ if (ISA_HAS_LASX) ++ emit_insn (gen_vec_unpacku_lo__internal (operands[0], operands[1])); ++ else ++ loongarch_expand_vec_unpack (operands, true/*unsigned_p*/); + DONE; + }) + +@@ -2093,7 +2051,7 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SF")]) + +-(define_insn "lsx_vfcvth_d_s" ++(define_insn "vec_unpacks_hi_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF +@@ -2113,7 +2071,7 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SF")]) + +-(define_insn "lsx_vfcvtl_d_s" ++(define_insn "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (float_extend:V2DF + (vec_select:V2SF +@@ -2691,23 +2649,23 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DF")]) + +-(define_insn "lsx_vffint_s_l" ++(define_insn "vec_packs_float_v2di" + [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V2DI 1 "register_operand" "f") +- (match_operand:V2DI 2 "register_operand" "f")] +- UNSPEC_LSX_VFFINT_S_L))] ++ (vec_concat:V4SF ++ (float:V2SF (match_operand:V2DI 1 "register_operand" "f")) ++ (float:V2SF (match_operand:V2DI 2 "register_operand" "f"))))] + "ISA_HAS_LSX" +- "vffint.s.l\t%w0,%w1,%w2" ++ "vffint.s.l\t%w0,%w2,%w1" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DI")]) + +-(define_insn "lsx_vftintrz_w_d" ++(define_insn "vec_pack_sfix_trunc_v2df" + [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f") +- (match_operand:V2DF 2 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRZ_W_D))] ++ (vec_concat:V4SI ++ (fix:V2SI (match_operand:V2DF 1 "register_operand" "f")) ++ (fix:V2SI (match_operand:V2DF 2 "register_operand" "f"))))] + "ISA_HAS_LSX" +- "vftintrz.w.d\t%w0,%w1,%w2" ++ "vftintrz.w.d\t%w0,%w2,%w1" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DF")]) + +@@ -2759,37 +2717,45 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4SF")]) + +-(define_insn "lsx_vffinth_d_w" ++(define_insn "vec_unpacks_float_hi_v4si" + [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")] +- UNSPEC_LSX_VFFINTH_D_W))] ++ (float:V2DF ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "register_operand" "f") ++ (parallel [(const_int 2) (const_int 3)]))))] + "ISA_HAS_LSX" + "vffinth.d.w\t%w0,%w1" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4SI")]) + +-(define_insn "lsx_vffintl_d_w" ++(define_insn "vec_unpacks_float_lo_v4si" + [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")] +- UNSPEC_LSX_VFFINTL_D_W))] ++ (float:V2DF ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 1)]))))] + "ISA_HAS_LSX" + "vffintl.d.w\t%w0,%w1" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4SI")]) + +-(define_insn "lsx_vftintrzh_l_s" ++(define_insn "vec_unpack_sfix_trunc_hi_v4sf" + [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRZH_L_S))] ++ (fix:V2DI ++ (vec_select:V2SF ++ (match_operand:V4SF 1 "register_operand" "f") ++ (parallel [(const_int 2) (const_int 3)]))))] + "ISA_HAS_LSX" + "vftintrzh.l.s\t%w0,%w1" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4SF")]) + +-(define_insn "lsx_vftintrzl_l_s" ++(define_insn "vec_unpack_sfix_trunc_lo_v4sf" + [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRZL_L_S))] ++ (fix:V2DI ++ (vec_select:V2SF ++ (match_operand:V4SF 1 "register_operand" "f") ++ (parallel [(const_int 0) (const_int 1)]))))] + "ISA_HAS_LSX" + "vftintrzl.l.s\t%w0,%w1" + [(set_attr "type" "simd_shift") +@@ -4015,7 +3981,7 @@ + [(set_attr "type" "simd_bit") + (set_attr "mode" "V16QI")]) + +-(define_insn "lsx_vexth_h_b" ++(define_insn "vec_unpack_hi_v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=f") + (any_extend:V8HI + (vec_select:V8QI +@@ -4029,7 +3995,7 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V8HI")]) + +-(define_insn "lsx_vexth_w_h" ++(define_insn "vec_unpack_hi_v8hi" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (any_extend:V4SI + (vec_select:V4HI +@@ -4041,7 +4007,7 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4SI")]) + +-(define_insn "lsx_vexth_d_w" ++(define_insn "vec_unpack_hi_v4si" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (any_extend:V2DI + (vec_select:V2SI +diff --git a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_128.c b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_128.c +new file mode 100644 +index 000000000..164b01e24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_128.c +@@ -0,0 +1,120 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -O3" } */ ++ ++#define N 128 ++ ++char c[N]; ++short int h[N]; ++int s[N]; ++long l[N]; ++float f[N]; ++double d[N]; ++unsigned char uc[N]; ++unsigned short int uh[N]; ++unsigned int us[N]; ++unsigned long ul[N]; ++ ++/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v2df:.*\tvftintrz\\.l\\.d.*-test_vec_pack_sfix_trunc_v2df\n" } } */ ++/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v2df:.*\tvpickev\\.w.*-test_vec_pack_sfix_trunc_v2df\n" } } */ ++/* { dg-final { scan-assembler "test_vec_pack_sfix_trunc_v2df:.*\tvftintrz\\.w\\.d.*-test_vec_pack_sfix_trunc_v2df\n" } } */ ++void ++test_vec_pack_sfix_trunc_v2df (void) ++{ ++ for (int i = 0; i < N; i++) ++ s[i] = d[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_packs_float_v2di:.*\tmovgr2fr\\.d.*-test_vec_packs_float_v2di" } } */ ++/* { dg-final { scan-assembler "test_vec_packs_float_v2di:.*\tvffint\\.s\\.l.*-test_vec_packs_float_v2di" } } */ ++void ++test_vec_packs_float_v2di (void) ++{ ++ for (int i = 0; i < N; i++) ++ f[i] = l[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tftintrz\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tvftintrzh\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tvftintrzl\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */ ++void ++test_vec_unpack_sfix_trunc_hi_lo_v4sf (void) ++{ ++ for (int i = 0; i < N; i++) ++ l[i] = f[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvslti\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvilvl\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvffint\\.d\\.l.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v4si:.*\tvffinth\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v4si:.*\tvffintl\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */ ++void ++test_vec_unpacks_float_hi_lo_v4si (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = s[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacks_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4si:.*\tvexth\\.d\\.w.*-test_vec_unpacks_hi_lo_v4si" } } */ ++void ++test_vec_unpacks_hi_lo_v4si (void) ++{ ++ for (int i = 0; i < N; i++) ++ l[i] = s[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v8hi:.*\tvilvh\\.h.*-test_vec_unpacks_hi_lo_v8hi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8hi:.*\tvexth\\.w\\.h.*-test_vec_unpacks_hi_lo_v8hi" } } */ ++void ++test_vec_unpacks_hi_lo_v8hi (void) ++{ ++ for (int i = 0; i < N; i++) ++ s[i] = h[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v16qi:.*\tvilvh\\.b.*-test_vec_unpacks_hi_lo_v16qi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16qi:.*\tvexth\\.h\\.b.*-test_vec_unpacks_hi_lo_v16qi" } } */ ++void ++test_vec_unpacks_hi_lo_v16qi (void) ++{ ++ for (int i = 0; i < N; i++) ++ h[i] = c[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4sf:.*\tvfcvtl\\.d\\.s.*-test_vec_unpacks_hi_lo_v4sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4sf:.*\tvfcvth\\.d\\.s.*-test_vec_unpacks_hi_lo_v4sf" } } */ ++void ++test_vec_unpacks_hi_lo_v4sf (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = f[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacku_hi_lo_v4si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v4si:.*\tvexth\\.du\\.wu.*-test_vec_unpacku_hi_lo_v4si" } } */ ++void ++test_vec_unpacku_hi_lo_v4si (void) ++{ ++ for (int i = 0; i < N; i++) ++ ul[i] = us[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v8hi:.*\tvilvh\\.h.*-test_vec_unpacku_hi_lo_v8hi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8hi:.*\tvexth\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v8hi" } } */ ++void ++test_vec_unpacku_hi_lo_v8hi (void) ++{ ++ for (int i = 0; i < N; i++) ++ us[i] = uh[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v16qi:.*\tvilvh\\.b.*-test_vec_unpacku_hi_lo_v16qi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16qi:.*\tvexth\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v16qi" } } */ ++void ++test_vec_unpacku_hi_lo_v16qi (void) ++{ ++ for (int i = 0; i < N; i++) ++ uh[i] = uc[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c +new file mode 100644 +index 000000000..506b7bdb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vec_pack_unpack_256.c +@@ -0,0 +1,118 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++ ++#define N 128 ++ ++char c[N]; ++short int h[N]; ++int s[N]; ++long l[N]; ++float f[N]; ++double d[N]; ++unsigned char uc[N]; ++unsigned short int uh[N]; ++unsigned int us[N]; ++unsigned long ul[N]; ++ ++/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v4df:.*\txvftintrz\\.l\\.d.*-test_vec_pack_sfix_trunc_v4df\n" } } */ ++/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v4df:.*\txvpickev\\.w.*-test_vec_pack_sfix_trunc_v4df\n" } } */ ++/* { dg-final { scan-assembler "test_vec_pack_sfix_trunc_v4df:.*\txvftintrz\\.w\\.d.*-test_vec_pack_sfix_trunc_v4df\n" } } */ ++void ++test_vec_pack_sfix_trunc_v4df (void) ++{ ++ for (int i = 0; i < N; i++) ++ s[i] = d[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_packs_float_v4di:.*\tmovgr2fr\\.d.*-test_vec_packs_float_v4di" } } */ ++/* { dg-final { scan-assembler "test_vec_packs_float_v4di:.*\txvffint\\.s\\.l.*-test_vec_packs_float_v4di" } } */ ++void ++test_vec_packs_float_v4di (void) ++{ ++ for (int i = 0; i < N; i++) ++ f[i] = l[i]; ++} ++ ++/* { dg-final { scan-assembler-not "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\tftintrz\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\txvftintrzh\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\txvftintrzl\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */ ++void ++test_vec_unpack_sfix_trunc_hi_lo_v8sf (void) ++{ ++ for (int i = 0; i < N; i++) ++ l[i] = f[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_float_hi_lo_v8si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v8si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvffint\\.d\\.l.*-test_vec_unpacks_float_hi_lo_v8si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvffinth\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v8si" } } */ ++void ++test_vec_unpacks_float_hi_lo_v8si (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = s[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */ ++void ++test_vec_unpacks_hi_lo_v8si (void) ++{ ++ for (int i = 0; i < N; i++) ++ l[i] = s[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */ ++void ++test_vec_unpacks_hi_lo_v16hi (void) ++{ ++ for (int i = 0; i < N; i++) ++ s[i] = h[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */ ++void ++test_vec_unpacks_hi_lo_v32qi (void) ++{ ++ for (int i = 0; i < N; i++) ++ h[i] = c[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8sf:.*\txvfcvtl\\.d\\.s.*-test_vec_unpacks_hi_lo_v8sf" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8sf:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8sf" } } */ ++void ++test_vec_unpacks_hi_lo_v8sf (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = f[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */ ++void ++test_vec_unpacku_hi_lo_v8si (void) ++{ ++ for (int i = 0; i < N; i++) ++ ul[i] = us[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */ ++void ++test_vec_unpacku_hi_lo_v16hi (void) ++{ ++ for (int i = 0; i < N; i++) ++ us[i] = uh[i]; ++} ++ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi" } } */ ++/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */ ++void ++test_vec_unpacku_hi_lo_v32qi (void) ++{ ++ for (int i = 0; i < N; i++) ++ uh[i] = uc[i]; ++} +-- +2.47.3 + diff --git a/0022-LoongArch-Add-standard-patterns-uabd-and-sabd.patch b/0022-LoongArch-Add-standard-patterns-uabd-and-sabd.patch new file mode 100644 index 0000000..92030d8 --- /dev/null +++ b/0022-LoongArch-Add-standard-patterns-uabd-and-sabd.patch @@ -0,0 +1,411 @@ +From f210a08fd6ffa3caefd36c254e97bbd9075f2667 Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 15:12:02 +0800 +Subject: [PATCH 22/59] LoongArch: Add standard patterns uabd and sabd + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvabsd_s_): Remove. + (abd3): New insn pattern. + (lasx_xvabsd_u_): Remove. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vabsd_b): + Rename. + (CODE_FOR_lsx_vabsd_h): Ditto. + (CODE_FOR_lsx_vabsd_w): Ditto. + (CODE_FOR_lsx_vabsd_d): Ditto. + (CODE_FOR_lsx_vabsd_bu): Ditto. + (CODE_FOR_lsx_vabsd_hu): Ditto. + (CODE_FOR_lsx_vabsd_wu): Ditto. + (CODE_FOR_lsx_vabsd_du): Ditto. + (CODE_FOR_lasx_xvabsd_b): Ditto. + (CODE_FOR_lasx_xvabsd_h): Ditto. + (CODE_FOR_lasx_xvabsd_w): Ditto. + (CODE_FOR_lasx_xvabsd_d): Ditto. + (CODE_FOR_lasx_xvabsd_bu): Ditto. + (CODE_FOR_lasx_xvabsd_hu): Ditto. + (CODE_FOR_lasx_xvabsd_wu): Ditto. + (CODE_FOR_lasx_xvabsd_du): Ditto. + * config/loongarch/loongarch.md (u): Add smax/umax. + * config/loongarch/lsx.md (SU_MAX): New iterator. + (su_min): New attr. + (lsx_vabsd_s_): Remove. + (abd3): New insn pattern. + (lsx_vabsd_u_): Remove. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/abd-lasx.c: New test. + * gcc.target/loongarch/abd-lsx.c: New test. +--- + gcc/config/loongarch/lasx.md | 30 +++------ + gcc/config/loongarch/loongarch-builtins.cc | 32 ++++----- + gcc/config/loongarch/loongarch.md | 6 +- + gcc/config/loongarch/lsx.md | 37 +++++----- + gcc/testsuite/gcc.target/loongarch/abd-lasx.c | 67 +++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/abd-lsx.c | 67 +++++++++++++++++++ + 6 files changed, 182 insertions(+), 57 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/abd-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/abd-lsx.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index a04bf1e37..d6922adee 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -20,8 +20,6 @@ + ;; + + (define_c_enum "unspec" [ +- UNSPEC_LASX_XVABSD_S +- UNSPEC_LASX_XVABSD_U + UNSPEC_LASX_XVAVG_S + UNSPEC_LASX_XVAVG_U + UNSPEC_LASX_XVAVGR_S +@@ -1125,23 +1123,17 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvabsd_s_" ++(define_insn "abd3" + [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVABSD_S))] +- "ISA_HAS_LASX" +- "xvabsd.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- +-(define_insn "lasx_xvabsd_u_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVABSD_U))] ++ (minus:ILASX ++ (SU_MAX:ILASX ++ (match_operand:ILASX 1 "register_operand" "f") ++ (match_operand:ILASX 2 "register_operand" "f")) ++ (:ILASX ++ (match_dup 1) ++ (match_dup 2))))] + "ISA_HAS_LASX" +- "xvabsd.\t%u0,%u1,%u2" ++ "xvabsd.\t%u0,%u1,%u2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +@@ -4926,7 +4918,7 @@ + rtx t1 = gen_reg_rtx (V32QImode); + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); +- emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2])); ++ emit_insn (gen_uabdv32qi3 (t1, operands[1], operands[2])); + emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); + emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); +@@ -4943,7 +4935,7 @@ + rtx t1 = gen_reg_rtx (V32QImode); + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); +- emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2])); ++ emit_insn (gen_sabdv32qi3 (t1, operands[1], operands[2])); + emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); + emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index d3fb99ad0..b3ec7f33d 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -462,14 +462,14 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vssub_hu CODE_FOR_lsx_vssub_u_hu + #define CODE_FOR_lsx_vssub_wu CODE_FOR_lsx_vssub_u_wu + #define CODE_FOR_lsx_vssub_du CODE_FOR_lsx_vssub_u_du +-#define CODE_FOR_lsx_vabsd_b CODE_FOR_lsx_vabsd_s_b +-#define CODE_FOR_lsx_vabsd_h CODE_FOR_lsx_vabsd_s_h +-#define CODE_FOR_lsx_vabsd_w CODE_FOR_lsx_vabsd_s_w +-#define CODE_FOR_lsx_vabsd_d CODE_FOR_lsx_vabsd_s_d +-#define CODE_FOR_lsx_vabsd_bu CODE_FOR_lsx_vabsd_u_bu +-#define CODE_FOR_lsx_vabsd_hu CODE_FOR_lsx_vabsd_u_hu +-#define CODE_FOR_lsx_vabsd_wu CODE_FOR_lsx_vabsd_u_wu +-#define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du ++#define CODE_FOR_lsx_vabsd_b CODE_FOR_sabdv16qi3 ++#define CODE_FOR_lsx_vabsd_h CODE_FOR_sabdv8hi3 ++#define CODE_FOR_lsx_vabsd_w CODE_FOR_sabdv4si3 ++#define CODE_FOR_lsx_vabsd_d CODE_FOR_sabdv2di3 ++#define CODE_FOR_lsx_vabsd_bu CODE_FOR_uabdv16qi3 ++#define CODE_FOR_lsx_vabsd_hu CODE_FOR_uabdv8hi3 ++#define CODE_FOR_lsx_vabsd_wu CODE_FOR_uabdv4si3 ++#define CODE_FOR_lsx_vabsd_du CODE_FOR_uabdv2di3 + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d + #define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3 +@@ -742,14 +742,14 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvssub_hu CODE_FOR_lasx_xvssub_u_hu + #define CODE_FOR_lasx_xvssub_wu CODE_FOR_lasx_xvssub_u_wu + #define CODE_FOR_lasx_xvssub_du CODE_FOR_lasx_xvssub_u_du +-#define CODE_FOR_lasx_xvabsd_b CODE_FOR_lasx_xvabsd_s_b +-#define CODE_FOR_lasx_xvabsd_h CODE_FOR_lasx_xvabsd_s_h +-#define CODE_FOR_lasx_xvabsd_w CODE_FOR_lasx_xvabsd_s_w +-#define CODE_FOR_lasx_xvabsd_d CODE_FOR_lasx_xvabsd_s_d +-#define CODE_FOR_lasx_xvabsd_bu CODE_FOR_lasx_xvabsd_u_bu +-#define CODE_FOR_lasx_xvabsd_hu CODE_FOR_lasx_xvabsd_u_hu +-#define CODE_FOR_lasx_xvabsd_wu CODE_FOR_lasx_xvabsd_u_wu +-#define CODE_FOR_lasx_xvabsd_du CODE_FOR_lasx_xvabsd_u_du ++#define CODE_FOR_lasx_xvabsd_b CODE_FOR_sabdv32qi3 ++#define CODE_FOR_lasx_xvabsd_h CODE_FOR_sabdv16hi3 ++#define CODE_FOR_lasx_xvabsd_w CODE_FOR_sabdv8si3 ++#define CODE_FOR_lasx_xvabsd_d CODE_FOR_sabdv4di3 ++#define CODE_FOR_lasx_xvabsd_bu CODE_FOR_uabdv32qi3 ++#define CODE_FOR_lasx_xvabsd_hu CODE_FOR_uabdv16hi3 ++#define CODE_FOR_lasx_xvabsd_wu CODE_FOR_uabdv8si3 ++#define CODE_FOR_lasx_xvabsd_du CODE_FOR_uabdv4di3 + #define CODE_FOR_lasx_xvavg_b CODE_FOR_lasx_xvavg_s_b + #define CODE_FOR_lasx_xvavg_h CODE_FOR_lasx_xvavg_s_h + #define CODE_FOR_lasx_xvavg_w CODE_FOR_lasx_xvavg_s_w +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4b8caeda7..b129c54eb 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -530,13 +530,15 @@ + (gt "") (gtu "u") + (ge "") (geu "u") + (lt "") (ltu "u") +- (le "") (leu "u")]) ++ (le "") (leu "u") ++ (smax "") (umax "u")]) + + ;; is like except uppercase. + (define_code_attr U [(sign_extend "") (zero_extend "U")]) + + ;; is like , but the signed form expands to "s" rather than "". +-(define_code_attr su [(sign_extend "s") (zero_extend "u")]) ++(define_code_attr su [(sign_extend "s") (zero_extend "u") ++ (smax "s") (umax "u")]) + + (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")]) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 22a3e3fee..7d46c4107 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -20,8 +20,6 @@ + ;; + + (define_c_enum "unspec" [ +- UNSPEC_LSX_ABSD_S +- UNSPEC_LSX_VABSD_U + UNSPEC_LSX_VAVG_S + UNSPEC_LSX_VAVG_U + UNSPEC_LSX_VAVGR_S +@@ -191,6 +189,11 @@ + (V4SI "V8HI") + (V2DI "V4SI")]) + ++;; Signed and unsigned max operations. ++(define_code_iterator SU_MAX [smax umax]) ++ ++(define_code_attr su_min [(smax "smin") (umax "umin")]) ++ + ;; The attribute gives double modes for vector modes. + (define_mode_attr VDMODE + [(V2DI "V2DI") +@@ -976,23 +979,17 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "lsx_vabsd_s_" ++(define_insn "abd3" + [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f")] +- UNSPEC_LSX_ABSD_S))] +- "ISA_HAS_LSX" +- "vabsd.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- +-(define_insn "lsx_vabsd_u_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f")] +- UNSPEC_LSX_VABSD_U))] +- "ISA_HAS_LSX" +- "vabsd.\t%w0,%w1,%w2" ++ (minus:ILSX ++ (SU_MAX:ILSX ++ (match_operand:ILSX 1 "register_operand" "f") ++ (match_operand:ILSX 2 "register_operand" "f")) ++ (:ILSX ++ (match_dup 1) ++ (match_dup 2))))] ++ "ISA_HAS_LSX" ++ "vabsd.\t%w0,%w1,%w2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +@@ -3181,7 +3178,7 @@ + rtx t1 = gen_reg_rtx (V16QImode); + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); +- emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2])); ++ emit_insn (gen_uabdv16qi3 (t1, operands[1], operands[2])); + emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); + emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); +@@ -3198,7 +3195,7 @@ + rtx t1 = gen_reg_rtx (V16QImode); + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); +- emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2])); ++ emit_insn (gen_sabdv16qi3 (t1, operands[1], operands[2])); + emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); + emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); +diff --git a/gcc/testsuite/gcc.target/loongarch/abd-lasx.c b/gcc/testsuite/gcc.target/loongarch/abd-lasx.c +new file mode 100644 +index 000000000..0cb639b96 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/abd-lasx.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx -fdump-rtl-expand-all" } */ ++ ++#define ABD(x, y) ((x - y > 0) ? (x - y) : -(x - y)) ++#define MAX(x, y) ((x) > (y) ? (x) : (y)) ++#define MIN(x, y) ((x) < (y) ? (x) : (y)) ++#define N 1024 ++ ++#define FUNC1(T) \ ++ void \ ++ sabd1_##T (signed T *restrict a, signed T *restrict b, \ ++ signed T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = ABD (a[i], b[i]); \ ++ } \ ++ \ ++ void \ ++ uabd1_##T (unsigned T *restrict a, unsigned T *restrict b, \ ++ unsigned T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = ABD (a[i], b[i]); \ ++ } ++ ++#define FUNC2(T) \ ++ void \ ++ sabd2_##T (signed T *restrict a, signed T *restrict b, \ ++ signed T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \ ++ } \ ++ \ ++ void \ ++ uabd2_##T (unsigned T *restrict a, unsigned T *restrict b, \ ++ unsigned T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \ ++ } ++ ++/* Verify if the expand pass fits standard pattern name. */ ++FUNC1 (char) ++FUNC1 (short) ++FUNC1 (int) ++FUNC1 (long) ++ ++/* Verify if the combiner works well. */ ++FUNC2 (char) ++FUNC2 (short) ++FUNC2 (int) ++FUNC2 (long) ++/* { dg-final { scan-rtl-dump "Function sabd1_char.*ABD.*Function uabd1_char" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function uabd1_char.*ABD.*Function sabd1_short" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_short.*ABD.*Function uabd1_short" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function uabd1_short.*ABD.*Function sabd1_int" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_int.*ABD.*Function uabd1_int" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_long.*ABD.*Function uabd1_long" "expand" } } */ ++/* { dg-final { scan-assembler-times "sabd2_char:.*\txvabsd\\.b.*-sabd2_char" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_char:.*\txvabsd\\.bu.*-uabd2_char" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_short:.*\txvabsd\\.h.*-sabd2_short" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_short:.*\txvabsd\\.hu.*-uabd2_short" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_int:.*\txvabsd\\.w.*-sabd2_int" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_int:.*\txvabsd\\.wu.*-uabd2_int" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_long:.*\txvabsd\\.d.*-sabd2_long" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_long:.*\txvabsd\\.du.*-uabd2_long" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/abd-lsx.c b/gcc/testsuite/gcc.target/loongarch/abd-lsx.c +new file mode 100644 +index 000000000..c036888e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/abd-lsx.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx -fdump-rtl-expand-all" } */ ++ ++#define ABD(x, y) ((x - y > 0) ? (x - y) : -(x - y)) ++#define MAX(x, y) ((x) > (y) ? (x) : (y)) ++#define MIN(x, y) ((x) < (y) ? (x) : (y)) ++#define N 1024 ++ ++#define FUNC1(T) \ ++ void \ ++ sabd1_##T (signed T *restrict a, signed T *restrict b, \ ++ signed T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = ABD (a[i], b[i]); \ ++ } \ ++ \ ++ void \ ++ uabd1_##T (unsigned T *restrict a, unsigned T *restrict b, \ ++ unsigned T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = ABD (a[i], b[i]); \ ++ } ++ ++#define FUNC2(T) \ ++ void \ ++ sabd2_##T (signed T *restrict a, signed T *restrict b, \ ++ signed T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \ ++ } \ ++ \ ++ void \ ++ uabd2_##T (unsigned T *restrict a, unsigned T *restrict b, \ ++ unsigned T *restrict out) \ ++ { \ ++ for (int i = 0; i < N; i++) \ ++ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \ ++ } ++ ++/* Verify if the expand pass fits standard pattern name. */ ++FUNC1 (char) ++FUNC1 (short) ++FUNC1 (int) ++FUNC1 (long) ++ ++/* Verify if the combiner works well. */ ++FUNC2 (char) ++FUNC2 (short) ++FUNC2 (int) ++FUNC2 (long) ++/* { dg-final { scan-rtl-dump "Function sabd1_char.*ABD.*Function uabd1_char" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function uabd1_char.*ABD.*Function sabd1_short" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_short.*ABD.*Function uabd1_short" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function uabd1_short.*ABD.*Function sabd1_int" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_int.*ABD.*Function uabd1_int" "expand" } } */ ++/* { dg-final { scan-rtl-dump "Function sabd1_long.*ABD.*Function uabd1_long" "expand" } } */ ++/* { dg-final { scan-assembler-times "sabd2_char:.*\tvabsd\\.b.*-sabd2_char" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_char:.*\tvabsd\\.bu.*-uabd2_char" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_short:.*\tvabsd\\.h.*-sabd2_short" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_short:.*\tvabsd\\.hu.*-uabd2_short" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_int:.*\tvabsd\\.w.*-sabd2_int" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_int:.*\tvabsd\\.wu.*-uabd2_int" 1 } } */ ++/* { dg-final { scan-assembler-times "sabd2_long:.*\tvabsd\\.d.*-sabd2_long" 1 } } */ ++/* { dg-final { scan-assembler-times "uabd2_long:.*\tvabsd\\.du.*-uabd2_long" 1 } } */ +-- +2.47.3 + diff --git a/0023-LoongArch-Optimize-for-conditional-move-operations.patch b/0023-LoongArch-Optimize-for-conditional-move-operations.patch new file mode 100644 index 0000000..0c3cf96 --- /dev/null +++ b/0023-LoongArch-Optimize-for-conditional-move-operations.patch @@ -0,0 +1,285 @@ +From 4384a41c4d2618854090f433304ae104e0bb2fe8 Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Mon, 30 Dec 2024 10:39:13 +0800 +Subject: [PATCH 23/59] LoongArch: Optimize for conditional move operations + +The optimization example is as follows. + +From: + if (condition) + dest += 1 << 16; +To: + dest += (condition ? 1 : 0) << 16; + +It does not use maskeqz and masknez, thus reducing the number of +instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Add some optimization + implementations based on noce_try_cmove_arith. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/conditional-move-opt-1.c: New test. + * gcc.target/loongarch/conditional-move-opt-2.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 103 +++++++++++++++++- + .../loongarch/conditional-move-opt-1.c | 58 ++++++++++ + .../loongarch/conditional-move-opt-2.c | 42 +++++++ + 3 files changed, 202 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d29924f3..8a1dcc716 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5284,6 +5284,81 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { ++ /* Optimize to reduce the number of instructions for ternary operations. ++ Mainly implemented based on noce_try_cmove_arith. ++ For dest = (condition) ? value_if_true : value_if_false; ++ the optimization requires: ++ a. value_if_false = var; ++ b. value_if_true = var OP C (a positive integer power of 2). ++ ++ Situations similar to the following: ++ if (condition) ++ dest += 1 << imm; ++ to: ++ dest += (condition ? 1 : 0) << imm; */ ++ ++ rtx_insn *insn; ++ HOST_WIDE_INT val = 0; /* The value of rtx C. */ ++ /* INSN with operands[2] as the output. */ ++ rtx_insn *value_if_true_insn = NULL; ++ /* INSN with operands[3] as the output. */ ++ rtx_insn *value_if_false_insn = NULL; ++ rtx value_if_true_insn_src = NULL_RTX; ++ /* Common operand var in value_if_true and value_if_false. */ ++ rtx comm_var = NULL_RTX; ++ bool can_be_optimized = false; ++ ++ /* Search value_if_true_insn and value_if_false_insn. */ ++ struct sequence_stack *seq = get_current_sequence ()->next; ++ for (insn = seq->last; insn; insn = PREV_INSN (insn)) ++ { ++ if (single_set (insn)) ++ { ++ rtx set_dest = SET_DEST (single_set (insn)); ++ if (rtx_equal_p (set_dest, operands[2])) ++ value_if_true_insn = insn; ++ else if (rtx_equal_p (set_dest, operands[3])) ++ value_if_false_insn = insn; ++ if (value_if_true_insn && value_if_false_insn) ++ break; ++ } ++ } ++ ++ /* Check if the optimization conditions are met. */ ++ if (value_if_true_insn ++ && value_if_false_insn ++ /* Make sure that value_if_false and var are the same. */ ++ && BINARY_P (value_if_true_insn_src ++ = SET_SRC (single_set (value_if_true_insn))) ++ /* Make sure that both value_if_true and value_if_false ++ has the same var. */ ++ && rtx_equal_p (XEXP (value_if_true_insn_src, 0), ++ SET_SRC (single_set (value_if_false_insn)))) ++ { ++ comm_var = SET_SRC (single_set (value_if_false_insn)); ++ rtx src = XEXP (value_if_true_insn_src, 1); ++ rtx imm = NULL_RTX; ++ if (CONST_INT_P (src)) ++ imm = src; ++ else ++ for (insn = seq->last; insn; insn = PREV_INSN (insn)) ++ { ++ rtx set = single_set (insn); ++ if (set && rtx_equal_p (SET_DEST (set), src)) ++ { ++ imm = SET_SRC (set); ++ break; ++ } ++ } ++ if (imm && CONST_INT_P (imm)) ++ { ++ val = INTVAL (imm); ++ /* Make sure that imm is a positive integer power of 2. */ ++ if (val > 0 && !(val & (val - 1))) ++ can_be_optimized = true; ++ } ++ } ++ + if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) + { + promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && +@@ -5304,22 +5379,48 @@ loongarch_expand_conditional_move (rtx *operands) + op0_extend = op0; + op1_extend = force_reg (word_mode, op1); + ++ rtx target = gen_reg_rtx (GET_MODE (op0)); ++ + if (code == EQ || code == NE) + { + op0 = loongarch_zero_if_equal (op0, op1); + op1 = const0_rtx; ++ /* For EQ, set target to 1 if op0 and op1 are the same, ++ otherwise set to 0. ++ For NE, set target to 0 if op0 and op1 are the same, ++ otherwise set to 1. */ ++ if (can_be_optimized) ++ loongarch_emit_binary (code, target, op0, const0_rtx); + } + else + { + /* The comparison needs a separate scc instruction. Store the + result of the scc in *OP0 and compare it against zero. */ + bool invert = false; +- rtx target = gen_reg_rtx (GET_MODE (op0)); + loongarch_emit_int_order_test (code, &invert, target, op0, op1); ++ if (can_be_optimized && invert) ++ loongarch_emit_binary (EQ, target, target, const0_rtx); + code = invert ? EQ : NE; + op0 = target; + op1 = const0_rtx; + } ++ ++ if (can_be_optimized) ++ { ++ /* Perform (condition ? 1 : 0) << log2 (C). */ ++ loongarch_emit_binary (ASHIFT, target, target, ++ GEN_INT (exact_log2 (val))); ++ /* Shift-related insn patterns only support SImode operands[2]. */ ++ enum rtx_code opcode = GET_CODE (value_if_true_insn_src); ++ if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT ++ || opcode == ROTATE || opcode == ROTATERT) ++ target = gen_lowpart (SImode, target); ++ /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */ ++ loongarch_emit_binary (opcode, operands[0], ++ force_reg (GET_MODE (operands[3]), comm_var), ++ target); ++ return; ++ } + } + + rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); +diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c +new file mode 100644 +index 000000000..ed13471aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler-not "maskeqz" } } */ ++/* { dg-final { scan-assembler-not "masknez" } } */ ++ ++extern long lm, ln, lr; ++ ++void ++test_ne () ++{ ++ if (lm != ln) ++ lr += (1 << 16); ++ lr += lm; ++} ++ ++void ++test_eq () ++{ ++ if (lm == ln) ++ lr = lm + (1 << 16); ++ else ++ lr = lm; ++ lr += lm; ++} ++ ++void ++test_lt () ++{ ++ if (lm < ln) ++ lr *= (1 << 16); ++ lr += lm; ++} ++ ++void ++test_le () ++{ ++ if (lm <= ln) ++ lr = lm * ((long)1 << 32); ++ else ++ lr = lm; ++ lr += lm; ++} ++ ++void ++test_nez () ++{ ++ if (lm != 0) ++ lr <<= (1 << 4); ++ lr += lm; ++} ++ ++void ++test_eqz () ++{ ++ if (lm == 0) ++ lr >>= (1 << 2); ++ lr += lm; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c +new file mode 100644 +index 000000000..ac72d4d93 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */ ++/* { dg-final { scan-assembler-not "maskeqz" } } */ ++/* { dg-final { scan-assembler-not "masknez" } } */ ++ ++/* The relevant optimization is currently only based on noce_try_cmove_arith, ++ so it bypasses noce_convert_multiple_sets by ++ --param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith. */ ++ ++extern long lm, ln, lr; ++ ++void ++test_ge () ++{ ++ if (lm >= ln) ++ lr += ((long)1 << 32); ++ lr += lm; ++} ++ ++void ++test_ltz () ++{ ++ if (lm < 0) ++ lr |= (1 << 16); ++ lr += lm; ++} ++ ++void ++test_lez () ++{ ++ if (lm <= 0) ++ lr &= (1 << 16); ++ lr += lm; ++} ++ ++void ++test_gez () ++{ ++ if (lm >= 0) ++ lr ^= (1 << 16); ++ lr += lm; ++} +-- +2.47.3 + diff --git a/0024-LoongArch-Optimize-initializing-fp-resgister-to-zero.patch b/0024-LoongArch-Optimize-initializing-fp-resgister-to-zero.patch new file mode 100644 index 0000000..75d9a18 --- /dev/null +++ b/0024-LoongArch-Optimize-initializing-fp-resgister-to-zero.patch @@ -0,0 +1,86 @@ +From 047e368c633c1c7cf1bf6cf2036b8f79f5a7cbce Mon Sep 17 00:00:00 2001 +From: Deng Jianbo +Date: Tue, 31 Dec 2024 19:33:23 +0800 +Subject: [PATCH 24/59] LoongArch: Optimize initializing fp resgister to zero + +In LoongArch, currently uses instruction movgr2fr.{d|w} to move zero +from fixed-point register to floating-pointer regsiter for initializing +fp register to zero. When LSX or LASX is enabled, we can use instruction +vxor.v which has lower latency than instruction movgr2fr.{d|w} to set fp +register to zero directly. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_output_move): + Optimize instructions for initializing fp regsiter to zero. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/mov-zero-1.c: New test. + * gcc.target/loongarch/mov-zero-2.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 2 ++ + gcc/testsuite/gcc.target/loongarch/mov-zero-1.c | 15 +++++++++++++++ + gcc/testsuite/gcc.target/loongarch/mov-zero-2.c | 15 +++++++++++++++ + 3 files changed, 32 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/mov-zero-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/mov-zero-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8a1dcc716..e2b07ab68 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4762,6 +4762,8 @@ loongarch_output_move (rtx dest, rtx src) + gcc_unreachable (); + } + } ++ if (ISA_HAS_LSX && src == CONST0_RTX (GET_MODE (src))) ++ return "vxor.v\t%w0,%w0,%w0"; + + return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1"; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c b/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c +new file mode 100644 +index 000000000..4744f2f2f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/mov-zero-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-times "vxor\\.v" 2 } } */ ++ ++double ++get_double_zero () ++{ ++ return 0; ++} ++ ++float ++get_float_zero () ++{ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c b/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c +new file mode 100644 +index 000000000..6cb48052d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/mov-zero-2.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-lsx" } */ ++/* { dg-final { scan-assembler-times "movgr2fr" 2 } } */ ++ ++double ++get_double_zero () ++{ ++ return 0; ++} ++ ++float ++get_float_zero () ++{ ++ return 0; ++} +-- +2.47.3 + diff --git a/0025-LoongArch-Opitmize-the-cost-of-vec_construct.patch b/0025-LoongArch-Opitmize-the-cost-of-vec_construct.patch new file mode 100644 index 0000000..7fdea20 --- /dev/null +++ b/0025-LoongArch-Opitmize-the-cost-of-vec_construct.patch @@ -0,0 +1,94 @@ +From ba2ec3067838fe10c534943cd710cffbab90fde4 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 7 Jan 2025 21:04:51 +0800 +Subject: [PATCH 25/59] LoongArch: Opitmize the cost of vec_construct. + +When analyzing 525 on LoongArch architecture, it was found that the +for loop of hotspot function x264_pixel_satd_8x4 could not be quantized +256-bit due to the cost of vec_construct setting. After re-adjusting +vec_construct, the performance of 525 program was improved by 16.57%. +It was found that this function can be vectorized on the aarch64 and +x86 architectures, see [PR98138]. + +Co-Authored-By: Deng Jianbo . + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_builtin_vectorization_cost): Modify the + construction cost of the vec_construct vector. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-slp-two-operator.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 6 +-- + .../loongarch/vect-slp-two-operator.c | 38 +++++++++++++++++++ + 2 files changed, 41 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-slp-two-operator.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e2b07ab68..81830d004 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4117,10 +4117,10 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); +- if (ISA_HAS_LASX) +- return elements + 1; ++ if (LASX_SUPPORTED_MODE_P (mode) && !LSX_SUPPORTED_MODE_P (mode)) ++ return elements / 2 + 3; + else +- return elements; ++ return elements / 2 + 1; + + default: + gcc_unreachable (); +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-slp-two-operator.c b/gcc/testsuite/gcc.target/loongarch/vect-slp-two-operator.c +new file mode 100644 +index 000000000..43b467599 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-slp-two-operator.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize -fdump-tree-vect -fdump-tree-vect-details" } */ ++ ++typedef unsigned char uint8_t; ++typedef unsigned int uint32_t; ++ ++#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) \ ++ { \ ++ int t0 = s0 + s1; \ ++ int t1 = s0 - s1; \ ++ int t2 = s2 + s3; \ ++ int t3 = s2 - s3; \ ++ d0 = t0 + t2; \ ++ d1 = t1 + t3; \ ++ d2 = t0 - t2; \ ++ d3 = t1 - t3; \ ++ } ++ ++void sink (uint32_t tmp[4][4]); ++ ++void ++x264_pixel_satd_8x4 (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2) ++{ ++ uint32_t tmp[4][4]; ++ int sum = 0; ++ for (int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2) ++ { ++ uint32_t a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16); ++ uint32_t a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16); ++ uint32_t a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16); ++ uint32_t a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16); ++ HADAMARD4 (tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3); ++ } ++ sink (tmp); ++} ++ ++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +-- +2.47.3 + diff --git a/0026-LoongArch-Generate-the-final-immediate-for-lu12i.w-l.patch b/0026-LoongArch-Generate-the-final-immediate-for-lu12i.w-l.patch new file mode 100644 index 0000000..0072352 --- /dev/null +++ b/0026-LoongArch-Generate-the-final-immediate-for-lu12i.w-l.patch @@ -0,0 +1,237 @@ +From aa5d98e4c693ad32fc7adba5e64f3abc44dcfba4 Mon Sep 17 00:00:00 2001 +From: mengqinggang +Date: Fri, 10 Jan 2025 10:27:09 +0800 +Subject: [PATCH 26/59] LoongArch: Generate the final immediate for lu12i.w, + lu32i.d and lu52i.d + +Generate 0x1010 instead of 0x1010000>>12 for lu12i.w. lu32i.d and lu52i.d use +the same processing. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use new loongarch_output_move. + * config/loongarch/loongarch-protos.h (loongarch_output_move): + Change parameters from (rtx, rtx) to (rtx *). + * config/loongarch/loongarch.cc (loongarch_output_move): + Generate final immediate for lu12i.w and lu52i.d. + * config/loongarch/loongarch.md: + Generate final immediate for lu32i.d and lu52i.d. + * config/loongarch/lsx.md: Use new loongarch_output_move. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/imm-load.c: Not generate ">>". +--- + gcc/config/loongarch/lasx.md | 2 +- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 14 ++++++-- + gcc/config/loongarch/loongarch.md | 32 ++++++++++++------- + gcc/config/loongarch/lsx.md | 2 +- + gcc/testsuite/gcc.target/loongarch/imm-load.c | 1 + + 6 files changed, 35 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index d6922adee..640134acc 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -723,7 +723,7 @@ + [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f") + (match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))] + "ISA_HAS_LASX" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") + (set_attr "mode" "") + (set_attr "length" "8,4,4,4,4")]) +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 008eb28a9..12831eb93 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -86,7 +86,7 @@ extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); + extern void loongarch_split_vector_move (rtx, rtx); +-extern const char *loongarch_output_move (rtx, rtx); ++extern const char *loongarch_output_move (rtx *); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); + extern void loongarch_expand_vec_cmp (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 81830d004..da9620d46 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4711,8 +4711,10 @@ loongarch_split_vector_move (rtx dest, rtx src) + that SRC is operand 1 and DEST is operand 0. */ + + const char * +-loongarch_output_move (rtx dest, rtx src) ++loongarch_output_move (rtx *operands) + { ++ rtx src = operands[1]; ++ rtx dest = operands[0]; + enum rtx_code dest_code = GET_CODE (dest); + enum rtx_code src_code = GET_CODE (src); + machine_mode mode = GET_MODE (dest); +@@ -4867,13 +4869,19 @@ loongarch_output_move (rtx dest, rtx src) + if (src_code == CONST_INT) + { + if (LU12I_INT (src)) +- return "lu12i.w\t%0,%1>>12\t\t\t# %X1"; ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) >> 12); ++ return "lu12i.w\t%0,%1\t\t\t# %X1"; ++ } + else if (IMM12_INT (src)) + return "addi.w\t%0,$r0,%1\t\t\t# %X1"; + else if (IMM12_INT_UNSIGNED (src)) + return "ori\t%0,$r0,%1\t\t\t# %X1"; + else if (LU52I_INT (src)) +- return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1"; ++ { ++ operands[1] = GEN_INT (INTVAL (operands[1]) >> 52); ++ return "lu52i.d\t%0,$r0,%X1\t\t\t# %1"; ++ } + else + gcc_unreachable (); + } +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b129c54eb..5aecf5744 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2218,7 +2218,7 @@ + "!TARGET_64BIT + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO + (operands[0]))" + [(const_int 0)] +@@ -2237,7 +2237,9 @@ + "TARGET_64BIT + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { ++ return loongarch_output_move (operands); ++ } + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO + (operands[0]))" + [(const_int 0)] +@@ -2358,7 +2360,7 @@ + (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))] + "(register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO + (operands[0]))" + [(const_int 0)] +@@ -2392,7 +2394,7 @@ + (match_operand:QI 1 "move_operand" "r,I,m,rJ,k,rJ"))] + "(register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "move_type" "move,const,load,store,load,store") + (set_attr "mode" "QI")]) + +@@ -2413,7 +2415,7 @@ + "TARGET_HARD_FLOAT + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store") + (set_attr "mode" "SF")]) + +@@ -2423,7 +2425,7 @@ + "TARGET_SOFT_FLOAT + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "SF")]) + +@@ -2444,7 +2446,7 @@ + "TARGET_DOUBLE_FLOAT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store") + (set_attr "mode" "DF")]) + +@@ -2455,7 +2457,7 @@ + && TARGET_64BIT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")]) + +@@ -2598,7 +2600,10 @@ + (subreg:SI (match_operand:DI 1 "register_operand" "0") 0)) + (match_operand:DI 2 "const_lu32i_operand" "u")))] + "TARGET_64BIT" +- "lu32i.d\t%0,%X2>>32" ++ { ++ operands[2] = GEN_INT (INTVAL (operands[2]) >> 32); ++ return "lu32i.d\t%0,%X2"; ++ } + [(set_attr "type" "arith") + (set_attr "mode" "DI")]) + +@@ -2609,7 +2614,10 @@ + (match_operand 2 "lu52i_mask_operand")) + (match_operand 3 "const_lu52i_operand" "v")))] + "TARGET_64BIT" +- "lu52i.d\t%0,%1,%X3>>52" ++ { ++ operands[3] = GEN_INT (INTVAL (operands[3]) >> 52); ++ return "lu52i.d\t%0,%1,%X3"; ++ } + [(set_attr "type" "arith") + (set_attr "mode" "DI")]) + +@@ -4349,9 +4357,9 @@ + { + /* The load destination does not overlap the source. */ + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])); +- output_asm_insn (loongarch_output_move (operands[0], operands[1]), ++ output_asm_insn (loongarch_output_move (operands), + operands); +- output_asm_insn (loongarch_output_move (operands[2], operands[3]), ++ output_asm_insn (loongarch_output_move (&operands[2]), + &operands[2]); + return ""; + } +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 7d46c4107..cf6d05900 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -597,7 +597,7 @@ + [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") + (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))] + "ISA_HAS_LSX" +-{ return loongarch_output_move (operands[0], operands[1]); } ++{ return loongarch_output_move (operands); } + [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") + (set_attr "mode" "")]) + +diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c b/gcc/testsuite/gcc.target/loongarch/imm-load.c +index c04ca3399..33291fe89 100644 +--- a/gcc/testsuite/gcc.target/loongarch/imm-load.c ++++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c +@@ -1,5 +1,6 @@ + /* { dg-do compile } */ + /* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */ ++/* { dg-final { scan-assembler-not "test:.*>>.*test" } } */ + + long int + test (void) +-- +2.47.3 + diff --git a/0027-LoongArch-Add-alsl.wu.patch b/0027-LoongArch-Add-alsl.wu.patch new file mode 100644 index 0000000..580d739 --- /dev/null +++ b/0027-LoongArch-Add-alsl.wu.patch @@ -0,0 +1,63 @@ +From da67ca288e602414148729b521e86a0b69a2855f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 6 Sep 2024 03:27:19 +0800 +Subject: [PATCH 27/59] LoongArch: Add alsl.wu + +On 64-bit capable LoongArch hardware, alsl.wu is similar to alsl.w but +zero-extending the 32-bit result. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (alslsi3_extend): Add alsl.wu. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/alsl_wu.c: New test. +--- + gcc/config/loongarch/loongarch.md | 8 ++++---- + gcc/testsuite/gcc.target/loongarch/alsl_wu.c | 9 +++++++++ + 2 files changed, 13 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/alsl_wu.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 5aecf5744..3b1e5e1cf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3152,15 +3152,15 @@ + [(set_attr "type" "arith") + (set_attr "mode" "")]) + +-(define_insn "alslsi3_extend" ++(define_insn "*alslsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI ++ (any_extend:DI + (plus:SI + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_immalsl_operand" "")) + (match_operand:SI 3 "register_operand" "r"))))] +- "" +- "alsl.w\t%0,%1,%3,%2" ++ "TARGET_64BIT" ++ "alsl.w\t%0,%1,%3,%2" + [(set_attr "type" "arith") + (set_attr "mode" "SI")]) + +diff --git a/gcc/testsuite/gcc.target/loongarch/alsl_wu.c b/gcc/testsuite/gcc.target/loongarch/alsl_wu.c +new file mode 100644 +index 000000000..65f55e629 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/alsl_wu.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler "alsl\\.wu" } } */ ++ ++unsigned long ++test (unsigned int a, unsigned int b) ++{ ++ return (a << 2) + b; ++} +-- +2.47.3 + diff --git a/0028-LoongArch-Fix-cost-model-for-alsl.patch b/0028-LoongArch-Fix-cost-model-for-alsl.patch new file mode 100644 index 0000000..5792daf --- /dev/null +++ b/0028-LoongArch-Fix-cost-model-for-alsl.patch @@ -0,0 +1,97 @@ +From 3eb0d34f9aca3a085a87484599f7900123c6928a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 6 Sep 2024 00:34:55 +0800 +Subject: [PATCH 28/59] LoongArch: Fix cost model for alsl + +Our cost model for alsl was wrong: it matches (a + b * imm) where imm is +1, 2, 3, or 4 (should be 2, 4, 8, or 16), and it does not match +(a + (b << imm)) at all. For the test case: + + a += c << 3; + b += c << 3; + +it caused the compiler to perform a CSE and make one slli and two add, +but we just want two alsl. + +Also add a "code == PLUS" check to prevent matching a - (b << imm) as we +don't have any "slsl" instruction. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_rtx_costs): Fix the + cost for (a + b * imm) and (a + (b << imm)) which can be + implemented with a single alsl instruction. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/alsl-cost.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++---- + .../gcc.target/loongarch/alsl-cost.c | 14 ++++++++++ + 2 files changed, 36 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/alsl-cost.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index da9620d46..b9b9bbab8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3919,14 +3919,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + /* If it's an add + mult (which is equivalent to shift left) and + it's immediate operand satisfies const_immalsl_operand predicate. */ +- if ((mode == SImode || (TARGET_64BIT && mode == DImode)) +- && GET_CODE (XEXP (x, 0)) == MULT) ++ if (code == PLUS ++ && (mode == SImode || (TARGET_64BIT && mode == DImode))) + { +- rtx op2 = XEXP (XEXP (x, 0), 1); +- if (const_immalsl_operand (op2, mode)) ++ HOST_WIDE_INT shamt = -1; ++ rtx lhs = XEXP (x, 0); ++ rtx_code code_lhs = GET_CODE (lhs); ++ ++ switch (code_lhs) ++ { ++ case ASHIFT: ++ if (CONST_INT_P (XEXP (lhs, 1))) ++ shamt = INTVAL (XEXP (lhs, 1)); ++ break; ++ case MULT: ++ if (CONST_INT_P (XEXP (lhs, 1))) ++ shamt = exact_log2 (INTVAL (XEXP (lhs, 1))); ++ break; ++ default: ++ break; ++ } ++ ++ if (IN_RANGE (shamt, 1, 4)) + { + *total = (COSTS_N_INSNS (1) +- + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) ++ + set_src_cost (XEXP (lhs, 0), mode, speed) + + set_src_cost (XEXP (x, 1), mode, speed)); + return true; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/alsl-cost.c b/gcc/testsuite/gcc.target/loongarch/alsl-cost.c +new file mode 100644 +index 000000000..a18227901 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/alsl-cost.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=loongarch64" } */ ++/* { dg-final { scan-assembler-times "alsl\\\.\[wd\]" 2 } } */ ++ ++struct P ++{ ++ long a, b; ++}; ++ ++struct P ++t (struct P x, long n) ++{ ++ return (struct P){.a = x.a + n * 8, .b = x.b + n * 8}; ++} +-- +2.47.3 + diff --git a/0029-LoongArch-Simplify-using-bstr-ins-pick-instructions-.patch b/0029-LoongArch-Simplify-using-bstr-ins-pick-instructions-.patch new file mode 100644 index 0000000..b7ef687 --- /dev/null +++ b/0029-LoongArch-Simplify-using-bstr-ins-pick-instructions-.patch @@ -0,0 +1,220 @@ +From 05ae624a9f8700ed04ab88105771783a3d7f3633 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 5 Sep 2024 17:53:41 +0800 +Subject: [PATCH 29/59] LoongArch: Simplify using bstr{ins,pick} instructions + for and + +For bstrins, we can merge it into and3 instead of having a +separate define_insn. + +For bstrpick, we can use the constraints to ensure the first source +register and the destination register are the same hardware register, +instead of emitting a move manually. + +This will simplify the next commit where we'll reassociate bitwise +and left shift for better code generation. + +gcc/ChangeLog: + + * config/loongarch/constraints.md (Yy): New define_constriant. + * config/loongarch/loongarch.cc (loongarch_print_operand): + For "%M", output the index of bits to be used with + bstrins/bstrpick. + * config/loongarch/predicates.md (ins_zero_bitmask_operand): + Exclude low_bitmask_operand as for low_bitmask_operand it's + always better to use bstrpick instead of bstrins. + (and_operand): New define_predicate. + * config/loongarch/loongarch.md (any_or): New + define_code_iterator. + (bitwise_operand): New define_code_attr. + (*3): New define_insn. + (*and3): New define_insn. + (3): New define_expand. + (and3_extended): Remove, replaced by the 3rd alternative + of *and3. + (bstrins__for_mask): Remove, replaced by the 4th + alternative of *and3. + (*si3_internal): Remove, already covered by + the *3 and *and3 templates. +--- + gcc/config/loongarch/constraints.md | 4 ++ + gcc/config/loongarch/loongarch.cc | 12 +++++ + gcc/config/loongarch/loongarch.md | 77 +++++++++++------------------ + gcc/config/loongarch/predicates.md | 8 ++- + 4 files changed, 53 insertions(+), 48 deletions(-) + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index 40ac8d7f2..7a090be1e 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -292,6 +292,10 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + ++(define_constraint "Yy" ++ "@internal" ++ (match_operand 0 "ins_zero_bitmask_operand")) ++ + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b9b9bbab8..5eec36148 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6132,6 +6132,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'i' Print i if the operand is not a register. + 'L' Print the low-part relocation associated with OP. + 'm' Print one less than CONST_INT OP in decimal. ++ 'M' Print the indices of the lowest enabled bit and the highest ++ enabled bit in a mask (for bstr* instructions). + 'N' Print the inverse of the integer branch condition for comparison OP. + 'Q' Print R_LARCH_RELAX for TLS IE. + 'r' Print address 12-31bit relocation associated with OP. +@@ -6258,6 +6260,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + ++ case 'M': ++ if (CONST_INT_P (op)) ++ { ++ HOST_WIDE_INT mask = INTVAL (op); ++ fprintf (file, "%d,%d", floor_log2 (mask), ctz_hwi (mask)); ++ } ++ else ++ output_operand_lossage ("invalid use of '%%%c'", letter); ++ break; ++ + case 'N': + loongarch_print_int_branch_condition (file, reverse_condition (code), + letter); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3b1e5e1cf..c875d9134 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -488,7 +488,11 @@ + ;; This code iterator allows the three bitwise instructions to be generated + ;; from the same template. + (define_code_iterator any_bitwise [and ior xor]) ++(define_code_iterator any_or [ior xor]) + (define_code_iterator neg_bitwise [and ior]) ++(define_code_attr bitwise_operand [(and "and_operand") ++ (ior "uns_arith_operand") ++ (xor "uns_arith_operand")]) + + ;; This code iterator allows unsigned and signed division to be generated + ;; from the same template. +@@ -1546,23 +1550,37 @@ + ;; .................... + ;; + +-(define_insn "3" +- [(set (match_operand:X 0 "register_operand" "=r,r") +- (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r") +- (match_operand:X 2 "uns_arith_operand" "r,K")))] ++(define_insn "*3" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r") ++ (any_or:GPR (match_operand:GPR 1 "register_operand" "%r,r") ++ (match_operand:GPR 2 "uns_arith_operand" "r,K")))] + "" + "%i2\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + +-(define_insn "*si3_internal" +- [(set (match_operand:SI 0 "register_operand" "=r,r") +- (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r") +- (match_operand:SI 2 "uns_arith_operand" " r,K")))] +- "TARGET_64BIT" +- "%i2\t%0,%1,%2" +- [(set_attr "type" "logical") +- (set_attr "mode" "SI")]) ++(define_insn "*and3" ++ [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "%r,r,r,0") ++ (match_operand:GPR 2 "and_operand" "r,K,Yx,Yy")))] ++ "" ++ "@ ++ and\t%0,%1,%2 ++ andi\t%0,%1,%2 ++ * operands[2] = GEN_INT (INTVAL (operands[2]) \ ++ & GET_MODE_MASK (mode)); \ ++ return \"bstrpick.\t%0,%1,%M2\"; ++ * operands[2] = GEN_INT (~INTVAL (operands[2]) \ ++ & GET_MODE_MASK (mode)); \ ++ return \"bstrins.\t%0,%.,%M2\";" ++ [(set_attr "move_type" "logical,logical,pick_ins,pick_ins") ++ (set_attr "mode" "")]) ++ ++(define_expand "3" ++ [(set (match_operand:X 0 "register_operand") ++ (any_bitwise:X (match_operand:X 1 "register_operand") ++ (match_operand:X 2 "")))] ++ "") + + (define_insn "one_cmpl2" + [(set (match_operand:X 0 "register_operand" "=r") +@@ -1580,41 +1598,6 @@ + [(set_attr "type" "logical") + (set_attr "mode" "SI")]) + +-(define_insn "and3_extended" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r") +- (match_operand:GPR 2 "low_bitmask_operand" "Yx")))] +- "" +-{ +- int len; +- +- len = low_bitmask_len (mode, INTVAL (operands[2])); +- operands[2] = GEN_INT (len-1); +- return "bstrpick.\t%0,%1,%2,0"; +-} +- [(set_attr "move_type" "pick_ins") +- (set_attr "mode" "")]) +- +-(define_insn_and_split "*bstrins__for_mask" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "ins_zero_bitmask_operand" "i")))] +- "" +- "#" +- "" +- [(set (match_dup 0) (match_dup 1)) +- (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3)) +- (const_int 0))] +- { +- unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]); +- int lo = ffs_hwi (mask) - 1; +- int len = low_bitmask_len (mode, mask >> lo); +- +- len = MIN (len, GET_MODE_BITSIZE (mode) - lo); +- operands[2] = GEN_INT (len); +- operands[3] = GEN_INT (lo); +- }) +- + (define_insn_and_split "*bstrins__for_ior_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") + (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 28f8a0fba..8d9f92e22 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -405,7 +405,13 @@ + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ + > 0") +- (not (match_operand 0 "const_uns_arith_operand")))) ++ (not (match_operand 0 "const_uns_arith_operand")) ++ (not (match_operand 0 "low_bitmask_operand")))) ++ ++(define_predicate "and_operand" ++ (ior (match_operand 0 "uns_arith_operand") ++ (match_operand 0 "low_bitmask_operand") ++ (match_operand 0 "ins_zero_bitmask_operand"))) + + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +-- +2.47.3 + diff --git a/0030-LoongArch-Improve-reassociation-for-bitwise-operatio.patch b/0030-LoongArch-Improve-reassociation-for-bitwise-operatio.patch new file mode 100644 index 0000000..3bbc83c --- /dev/null +++ b/0030-LoongArch-Improve-reassociation-for-bitwise-operatio.patch @@ -0,0 +1,396 @@ +From 9a16482a76f1b9034fc23b6f09353d661996ab66 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 14 Jan 2025 17:26:04 +0800 +Subject: [PATCH 30/59] LoongArch: Improve reassociation for bitwise operation + and left shift [PR 115921] + +For things like + + (x | 0x101) << 11 + +It's obvious to write: + + ori $r4,$r4,257 + slli.d $r4,$r4,11 + +But we are actually generating something insane: + + lu12i.w $r12,524288>>12 # 0x80000 + ori $r12,$r12,2048 + slli.d $r4,$r4,11 + or $r4,$r4,$r12 + jr $r1 + +It's because the target-independent canonicalization was written before +we have all the RISC targets where loading an immediate may need +multiple instructions. So for these targets we need to handle this in +the target code. + +We do the reassociation on our own (i.e. reverting the +target-independent reassociation) if "(reg [&|^] mask) << shamt" does +not need to load mask into an register, and either: +- (mask << shamt) needs to be loaded into an register, or +- shamt is a const_immalsl_operand, so the outer shift may be further + combined with an add. + +gcc/ChangeLog: + + PR target/115921 + * config/loongarch/loongarch-protos.h + (loongarch_reassoc_shift_bitwise): New function prototype. + * config/loongarch/loongarch.cc + (loongarch_reassoc_shift_bitwise): Implement. + * config/loongarch/loongarch.md + (*alslsi3_extend_subreg): New define_insn_and_split. + (_shift_reverse): New + define_insn_and_split. + (_alsl_reversesi_extended): New + define_insn_and_split. + (zero_extend_ashift): Remove as it's just a special case of + and_shift_reversedi, and it does not make too much sense to + write "alsl.d rd,rs,r0,shamt" instead of "slli.d rd,rs,shamt". + (bstrpick_alsl_paired): Remove as it is already done by + splitting and_shift_reversedi into and + ashift first, then + late combining the ashift and a further add. + +gcc/testsuite/ChangeLog: + + PR target/115921 + * gcc.target/loongarch/bstrpick_alsl_paired.c (scan-rtl-dump): + Scan for and_shift_reversedi instead of the removed + bstrpick_alsl_paired. + * gcc.target/loongarch/bitwise-shift-reassoc.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 2 + + gcc/config/loongarch/loongarch.cc | 35 +++++ + gcc/config/loongarch/loongarch.md | 135 +++++++++++++----- + .../loongarch/bitwise-shift-reassoc.c | 98 +++++++++++++ + 4 files changed, 238 insertions(+), 32 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 12831eb93..cb2da4bbf 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -85,6 +85,8 @@ extern bool loongarch_split_move_p (rtx, rtx); + extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); ++extern rtx loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, ++ rtx mask, machine_mode mode); + extern void loongarch_split_vector_move (rtx, rtx); + extern const char *loongarch_output_move (rtx *); + #ifdef RTX_CODE +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5eec36148..8b9f96f73 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4520,6 +4520,41 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + op[2] = gen_int_mode (v, mode); + } + ++/* Test if reassociate (a << shamt) [&|^] mask to ++ (a [&|^] (mask >> shamt)) << shamt is possible and beneficial. ++ If true, return (mask >> shamt). Return NULL_RTX otherwise. */ ++ ++rtx ++loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask, ++ machine_mode mode) ++{ ++ gcc_checking_assert (CONST_INT_P (shamt)); ++ gcc_checking_assert (CONST_INT_P (mask)); ++ gcc_checking_assert (mode == SImode || mode == DImode); ++ ++ if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt)) ++ return NULL_RTX; ++ ++ rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask, ++ shamt); ++ if (const_uns_arith_operand (new_mask, mode)) ++ return new_mask; ++ ++ if (!is_and) ++ return NULL_RTX; ++ ++ if (low_bitmask_operand (new_mask, mode)) ++ return new_mask; ++ ++ /* Do an arithmetic shift for checking ins_zero_bitmask_operand: ++ ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an ++ ins_zero_bitmask_operand, but lshiftrt will produce ++ 0x3fffffff60000000. */ ++ new_mask = simplify_const_binary_operation (ASHIFTRT, mode, mask, ++ shamt); ++ return ins_zero_bitmask_operand (new_mask, mode) ? new_mask : NULL_RTX; ++} ++ + /* Implement TARGET_CONSTANT_ALIGNMENT. */ + + static HOST_WIDE_INT +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c875d9134..5d5771999 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -493,6 +493,7 @@ + (define_code_attr bitwise_operand [(and "and_operand") + (ior "uns_arith_operand") + (xor "uns_arith_operand")]) ++(define_code_attr is_and [(and "true") (ior "false") (xor "false")]) + + ;; This code iterator allows unsigned and signed division to be generated + ;; from the same template. +@@ -3093,38 +3094,6 @@ + } + }); + +-;; The following templates were added to generate "bstrpick.d + alsl.d" +-;; instruction pairs. +-;; It is required that the values of const_immalsl_operand and +-;; immediate_operand must have the following correspondence: +-;; +-;; (immediate_operand >> const_immalsl_operand) == 0xffffffff +- +-(define_insn "zero_extend_ashift" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") +- (match_operand 2 "const_immalsl_operand" "")) +- (match_operand 3 "immediate_operand" "")))] +- "TARGET_64BIT +- && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" +- "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- +-(define_insn "bstrpick_alsl_paired" +- [(set (match_operand:DI 0 "register_operand" "=&r") +- (plus:DI (match_operand:DI 1 "register_operand" "r") +- (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r") +- (match_operand 3 "const_immalsl_operand" "")) +- (match_operand 4 "immediate_operand" ""))))] +- "TARGET_64BIT +- && ((INTVAL (operands[4]) >> INTVAL (operands[3])) == 0xffffffff)" +- "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3" +- [(set_attr "type" "arith") +- (set_attr "mode" "DI") +- (set_attr "insn_count" "2")]) +- + (define_insn "alsl3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r") +@@ -3147,6 +3116,108 @@ + [(set_attr "type" "arith") + (set_attr "mode" "SI")]) + ++(define_insn "*alslsi3_extend_subreg" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (any_extend:DI ++ (plus:SI ++ (subreg:SI ++ (ashift:DI (match_operand:DI 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ 0) ++ (subreg:SI (match_operand:DI 3 "register_operand" "r") 0))))] ++ "TARGET_64BIT" ++ "alsl.w\t%0,%1,%3,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI")]) ++ ++;; The generic code prefers "(reg << shamt) [&|^] (mask << shamt)" ++;; instead of "(reg [&|^] mask) << shamt" but we want the latter if ++;; we don't need to load mask into an register, and either: ++;; - (mask << shamt) needs to be loaded into an register, or ++;; - shamt is a const_immalsl_operand, so the outer shift may be further ++;; combined with an add. ++(define_insn_and_split "_shift_reverse" ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (any_bitwise:X ++ (ashift:X (match_operand:X 1 "register_operand" "r") ++ (match_operand:SI 2 "const_int_operand" "i")) ++ (match_operand:X 3 "const_int_operand" "i")))] ++ "(const_immalsl_operand (operands[2], SImode) ++ || ! (operands[3], mode)) ++ && loongarch_reassoc_shift_bitwise (, operands[2], operands[3], ++ mode)" ++ "#" ++ "&& true" ++ [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))] ++ { ++ operands[3] = loongarch_reassoc_shift_bitwise (, ++ operands[2], ++ operands[3], ++ mode); ++ ++ if (ins_zero_bitmask_operand (operands[3], mode)) ++ { ++ gcc_checking_assert (); ++ emit_move_insn (operands[0], operands[1]); ++ operands[1] = operands[0]; ++ } ++ }) ++ ++;; The late_combine2 pass can handle slli.d + add.d => alsl.d, so we ++;; already have slli.d + any_bitwise + add.d => any_bitwise + slli.d + ++;; add.d => any_bitwise + alsl.d. But late_combine2 cannot handle slli.d + ++;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on ++;; our own. ++(define_insn_and_split "_alsl_reversesi_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (plus:SI ++ (subreg:SI ++ (any_bitwise:DI ++ (ashift:DI ++ (match_operand:DI 1 "register_operand" "r") ++ (match_operand:SI 2 "const_immalsl_operand" "")) ++ (match_operand:DI 3 "const_int_operand" "i")) ++ 0) ++ (match_operand:SI 4 "register_operand" "r"))))] ++ "TARGET_64BIT ++ && loongarch_reassoc_shift_bitwise (, operands[2], operands[3], ++ SImode)" ++ "#" ++ "&& true" ++ [; r0 = r1 [&|^] r3 is emitted in PREPARATION-STATEMENTS because we ++ ; need to handle a special case, see below. ++ (set (match_dup 0) ++ (sign_extend:DI ++ (plus:SI (ashift:SI (subreg:SI (match_dup 0) 0) (match_dup 2)) ++ (match_dup 4))))] ++ { ++ operands[3] = loongarch_reassoc_shift_bitwise (, ++ operands[2], ++ operands[3], ++ SImode); ++ ++ if (ins_zero_bitmask_operand (operands[3], SImode)) ++ { ++ gcc_checking_assert (); ++ emit_move_insn (operands[0], operands[1]); ++ operands[1] = operands[0]; ++ } ++ ++ if (operands[3] != CONSTM1_RTX (SImode)) ++ emit_insn (gen_di3 (operands[0], operands[1], operands[3])); ++ else ++ { ++ /* Hmm would we really reach here? If we reach here we'd have ++ a miss-optimization in the generic code (as it should have ++ optimized this to alslsi3_extend_subreg). But let's be safe ++ than sorry. */ ++ gcc_checking_assert (); ++ emit_move_insn (operands[0], operands[1]); ++ } ++ }) ++ + + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. +diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c +new file mode 100644 +index 000000000..3f1977556 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c +@@ -0,0 +1,98 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**t0: ++** ori (\$r[0-9]+),\$r4,257 ++** slli.d \$r4,\1,11 ++** jr \$r1 ++*/ ++long ++t0 (long x) ++{ ++ return (x | 0x101) << 11; ++} ++ ++/* ++**t1: ++** xori (\$r[0-9]+),\$r4,257 ++** alsl.d \$r4,\1,\$r5,3 ++** jr \$r1 ++*/ ++long ++t1 (long x, long y) ++{ ++ return ((x ^ 0x101) << 3) + y; ++} ++ ++/* ++**t2: ++** bstrins.d (\$r[0-9]+),\$r0,15,4 ++** alsl.d \$r4,\1,\$r5,2 ++** jr \$r1 ++*/ ++long ++t2 (long x, long y) ++{ ++ return ((x & ~0xfff0) << 2) + y; ++} ++ ++/* ++**t3: ++** ori (\$r[0-9]+),\$r4,3855 ++** alsl.w \$r4,\1,\$r5,1 ++** jr \$r1 ++*/ ++long ++t3 (long x, long y) ++{ ++ return (int)(((x | 0xf0f) << 1) + y); ++} ++ ++/* ++**t4: ++** bstrpick.d (\$r[0-9]+),\$r4,31,0 ++** slli.d \$r4,\1,1 ++** jr \$r1 ++*/ ++unsigned long ++t4 (unsigned long x) ++{ ++ return x << 32 >> 31; ++} ++ ++/* ++**t5: ++** bstrpick.d (\$r[0-9]+),\$r4,31,0 ++** alsl.d \$r4,\1,\$r5,2 ++** jr \$r1 ++*/ ++unsigned long ++t5 (unsigned long x, unsigned long y) ++{ ++ return (x << 32 >> 30) + y; ++} ++ ++/* ++**t6: ++** alsl.w \$r4,\$r4,\$r5,2 ++** jr \$r1 ++*/ ++unsigned int ++t6 (unsigned long x, unsigned long y) ++{ ++ return (x << 32 >> 30) + y; ++} ++ ++/* ++**t7: ++** bstrins.d \$r4,\$r0,47,0 ++** alsl.d \$r4,\$r4,\$r5,2 ++** jr \$r1 ++*/ ++unsigned long ++t7 (unsigned long x, unsigned long y) ++{ ++ return ((x & 0xffff000000000000) << 2) + y; ++} +-- +2.47.3 + diff --git a/0031-LoongArch-Implement-target-attribute.patch b/0031-LoongArch-Implement-target-attribute.patch new file mode 100644 index 0000000..98018df --- /dev/null +++ b/0031-LoongArch-Implement-target-attribute.patch @@ -0,0 +1,948 @@ +From 8da98b0a02ac9ee07dd18e31a8b81f1c45e3ee3e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 7 Jan 2025 11:42:25 +0800 +Subject: [PATCH 31/59] LoongArch: Implement target attribute. + +Add function attributes support for LoongArch. + +Currently, the following items are supported: + + __attribute__ ((target ("{no-}strict-align"))) + __attribute__ ((target ("cmodel="))) + __attribute__ ((target ("arch="))) + __attribute__ ((target ("tune="))) + __attribute__ ((target ("{no-}lsx"))) + __attribute__ ((target ("{no-}lasx"))) + +This implementation is derived from AArch64. + +gcc/ChangeLog: + + * attr-urls.def: Regenerate. + * config.gcc: Add loongarch-target-attr.o to extra_objs. + * config/loongarch/loongarch-protos.h + (loongarch_option_valid_attribute_p): Function declaration. + (loongarch_option_override_internal): Likewise. + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Delete the modifications + to target_option_default_node and target_option_current_node. + (loongarch_set_current_function): Add annotation information. + (loongarch_option_override): add assignment operations to + target_option_default_node and target_option_current_node. + (TARGET_OPTION_VALID_ATTRIBUTE_P): Define. + * config/loongarch/t-loongarch: Add compilation of target file + loongarch-target-attr.o. + * doc/extend.texi: Add description information of LoongArch + Function Attributes. + * config/loongarch/loongarch-target-attr.cc: New file. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/arch-func-attr-1.c: New test. + * gcc.target/loongarch/cmodel-func-attr-1.c: New test. + * gcc.target/loongarch/lasx-func-attr-1.c: New test. + * gcc.target/loongarch/lasx-func-attr-2.c: New test. + * gcc.target/loongarch/lsx-func-attr-1.c: New test. + * gcc.target/loongarch/lsx-func-attr-2.c: New test. + * gcc.target/loongarch/strict_align-func-attr-1.c: New test. + * gcc.target/loongarch/strict_align-func-attr-2.c: New test. + * gcc.target/loongarch/vector-func-attr-1.c: New test. + * gcc.target/loongarch/attr-check-error-message.c: New test. +--- + gcc/config.gcc | 2 +- + gcc/config/loongarch/loongarch-protos.h | 2 + + gcc/config/loongarch/loongarch-target-attr.cc | 413 ++++++++++++++++++ + gcc/config/loongarch/loongarch.cc | 26 +- + gcc/config/loongarch/t-loongarch | 6 + + gcc/doc/extend.texi | 75 ++++ + .../gcc.target/loongarch/arch-func-attr-1.c | 16 + + .../loongarch/attr-check-error-message.c | 30 ++ + .../gcc.target/loongarch/cmodel-func-attr-1.c | 17 + + .../gcc.target/loongarch/lasx-func-attr-1.c | 15 + + .../gcc.target/loongarch/lasx-func-attr-2.c | 12 + + .../gcc.target/loongarch/lsx-func-attr-1.c | 15 + + .../gcc.target/loongarch/lsx-func-attr-2.c | 12 + + .../loongarch/strict_align-func-attr-1.c | 17 + + .../loongarch/strict_align-func-attr-2.c | 17 + + .../gcc.target/loongarch/vector-func-attr-1.c | 15 + + 16 files changed, 685 insertions(+), 5 deletions(-) + create mode 100644 gcc/config/loongarch/loongarch-target-attr.cc + create mode 100644 gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-func-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-func-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index d7a525ade..45d945c99 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -458,7 +458,7 @@ loongarch*-*-*) + cpu_type=loongarch + d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" +- extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o" ++ extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o loongarch-target-attr.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_options="${extra_options} g.opt fused-madd.opt" + ;; +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index cb2da4bbf..0096cd6e8 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -214,4 +214,6 @@ extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); + extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); ++extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int); ++extern void loongarch_option_override_internal (struct loongarch_target *, struct gcc_options *, struct gcc_options *); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc +new file mode 100644 +index 000000000..6bb1e6b75 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-target-attr.cc +@@ -0,0 +1,413 @@ ++/* Subroutines used for LoongArch code generation. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ Based on AArch64 target for GNU compiler. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "target.h" ++#include "tree.h" ++#include "tm_p.h" ++#include "diagnostic.h" ++#include "opts.h" ++ ++/* Enum describing the various ways we can handle attributes. ++ In many cases we can reuse the generic option handling machinery. */ ++ ++enum loongarch_attr_opt_type ++{ ++ loongarch_attr_mask, /* Attribute should set a bit in target_flags. */ ++ loongarch_attr_enum, /* Attribute sets an enum variable. */ ++ loongarch_attr_bool /* Attribute sets or unsets a boolean variable. */ ++}; ++ ++/* All the information needed to handle a target attribute. ++ NAME is the name of the attribute. ++ ATTR_TYPE specifies the type of behavior of the attribute as described ++ in the definition of enum loongarch_attr_opt_type. ++ ALLOW_NEG is true if the attribute supports a "no-" form. ++ OPT_NUM is the enum specifying the option that the attribute modifies. ++ This is needed for attributes that mirror the behavior of a command-line ++ option, that is it has ATTR_TYPE loongarch_attr_mask. */ ++ ++struct loongarch_attribute_info ++{ ++ const char *name; ++ enum loongarch_attr_opt_type attr_type; ++ bool allow_neg; ++ enum opt_code opt_num; ++}; ++/* The target attributes that we support. */ ++ ++static const struct loongarch_attribute_info loongarch_attributes[] = ++{ ++ { "strict-align", loongarch_attr_mask, true, OPT_mstrict_align }, ++ { "cmodel", loongarch_attr_enum, false, OPT_mcmodel_ }, ++ { "arch", loongarch_attr_enum, false, OPT_march_ }, ++ { "tune", loongarch_attr_enum, false, OPT_mtune_ }, ++ { "lsx", loongarch_attr_bool, true, OPT_mlsx }, ++ { "lasx", loongarch_attr_bool, true, OPT_mlasx }, ++ { NULL, loongarch_attr_bool, false, OPT____ } ++}; ++ ++bool ++loongarch_handle_option (struct gcc_options *opts, ++ struct gcc_options *opts_set ATTRIBUTE_UNUSED, ++ const struct cl_decoded_option *decoded, ++ location_t loc ATTRIBUTE_UNUSED) ++{ ++ size_t code = decoded->opt_index; ++ int val = decoded->value; ++ ++ switch (code) ++ { ++ case OPT_mstrict_align: ++ if (val) ++ opts->x_target_flags |= MASK_STRICT_ALIGN; ++ else ++ opts->x_target_flags &= ~MASK_STRICT_ALIGN; ++ return true; ++ ++ case OPT_mcmodel_: ++ opts->x_la_opt_cmodel = val; ++ return true; ++ ++ case OPT_march_: ++ opts->x_la_opt_cpu_arch = val; ++ ++ /* Set these variables to the initial values so that they can be reset ++ in the loongarch_config_target function according to the ARCH ++ settings. */ ++ opts->x_la_opt_simd = M_OPT_UNSET; ++ opts->x_la_opt_fpu = M_OPT_UNSET; ++ opts->x_la_isa_evolution = 0; ++ return true; ++ ++ case OPT_mtune_: ++ opts->x_la_opt_cpu_tune = val; ++ ++ /* Set these variables to the initial values so that they can be reset ++ in the loongarch_target_option_override function according to the TUNE ++ settings. */ ++ opts->x_str_align_functions = NULL; ++ opts->x_str_align_loops = NULL; ++ opts->x_str_align_jumps = NULL; ++ return true; ++ ++ case OPT_mlsx: ++ opts->x_la_opt_simd = val ? (la_opt_simd == ISA_EXT_SIMD_LASX ++ ? ISA_EXT_SIMD_LASX : ISA_EXT_SIMD_LSX) : ISA_EXT_NONE; ++ return true; ++ ++ case OPT_mlasx: ++ opts->x_la_opt_simd = val ? ISA_EXT_SIMD_LASX ++ : (la_opt_simd == ISA_EXT_SIMD_LASX || la_opt_simd == ISA_EXT_SIMD_LSX ++ ? ISA_EXT_SIMD_LSX : ISA_EXT_NONE); ++ return true; ++ ++ default: ++ return true; ++ } ++} ++ ++/* Parse ARG_STR which contains the definition of one target attribute. ++ Show appropriate errors if any or return true if the attribute is valid. */ ++ ++static bool ++loongarch_process_one_target_attr (char *arg_str, location_t loc) ++{ ++ bool invert = false; ++ ++ size_t len = strlen (arg_str); ++ ++ if (len == 0) ++ { ++ error_at (loc, "malformed % pragma or attribute"); ++ return false; ++ } ++ ++ char *str_to_check = (char *) alloca (len + 1); ++ strcpy (str_to_check, arg_str); ++ ++ if (len > 3 && startswith (str_to_check, "no-")) ++ { ++ invert = true; ++ str_to_check += 3; ++ } ++ char *arg = strchr (str_to_check, '='); ++ ++ /* If we found opt=foo then terminate STR_TO_CHECK at the '=' ++ and point ARG to "foo". */ ++ if (arg) ++ { ++ *arg = '\0'; ++ arg++; ++ } ++ const struct loongarch_attribute_info *p_attr; ++ bool found = false; ++ for (p_attr = loongarch_attributes; p_attr->name; p_attr++) ++ { ++ /* If the names don't match up, or the user has given an argument ++ to an attribute that doesn't accept one, or didn't give an argument ++ to an attribute that expects one, fail to match. */ ++ if (strcmp (str_to_check, p_attr->name) != 0) ++ continue; ++ ++ found = true; ++ ++ /* If the name matches but the attribute does not allow "no-" versions ++ then we can't match. */ ++ if (invert && !p_attr->allow_neg) ++ { ++ error_at (loc, "pragma or attribute % does not " ++ "allow a negated form", str_to_check); ++ return false; ++ } ++ ++ switch (p_attr->attr_type) ++ { ++ /* Either set or unset a boolean option. */ ++ case loongarch_attr_mask: ++ { ++ struct cl_decoded_option decoded; ++ ++ /* We only need to specify the option number. ++ loongarch_handle_option will know which mask to apply. */ ++ decoded.opt_index = p_attr->opt_num; ++ decoded.value = !invert; ++ ++ loongarch_handle_option (&global_options, &global_options_set, ++ &decoded, input_location); ++ break; ++ } ++ ++ /* Use the option setting machinery to set an option to an enum. */ ++ case loongarch_attr_enum: ++ { ++ gcc_assert (arg); ++ bool valid; ++ int value; ++ struct cl_decoded_option decoded; ++ valid = opt_enum_arg_to_value (p_attr->opt_num, arg, ++ &value, CL_TARGET); ++ ++ decoded.opt_index = p_attr->opt_num; ++ decoded.value = value; ++ ++ if (valid) ++ loongarch_handle_option (&global_options, ++ &global_options_set, ++ &decoded, input_location); ++ else ++ error_at (loc, "pragma or attribute % is " ++ "not valid", str_to_check, arg); ++ break; ++ } ++ ++ /* Either set or unset a boolean option. */ ++ case loongarch_attr_bool: ++ { ++ struct cl_decoded_option decoded; ++ ++ generate_option (p_attr->opt_num, NULL, !invert, ++ CL_TARGET, &decoded); ++ loongarch_handle_option (&global_options, &global_options_set, ++ &decoded, input_location); ++ break; ++ } ++ default: ++ gcc_unreachable (); ++ } ++ } ++ ++ /* If we reached here we either have found an attribute and validated ++ it or didn't match any. If we matched an attribute but its arguments ++ were malformed we will have returned false already. */ ++ if (!found) ++ error_at (loc, "attribute % argument %qs is unknown", ++ str_to_check); ++ ++ return found; ++} ++ ++/* Count how many times the character C appears in ++ NULL-terminated string STR. */ ++ ++static unsigned int ++num_occurences_in_str (char c, char *str) ++{ ++ unsigned int res = 0; ++ while (*str != '\0') ++ { ++ if (*str == c) ++ res++; ++ ++ str++; ++ } ++ ++ return res; ++} ++ ++/* Parse the tree in ARGS that contains the target attribute information ++ and update the global target options space. */ ++ ++bool ++loongarch_process_target_attr (tree args, tree fndecl) ++{ ++ location_t loc ++ = fndecl == NULL ? UNKNOWN_LOCATION : DECL_SOURCE_LOCATION (fndecl); ++ ++ if (TREE_CODE (args) == TREE_LIST) ++ { ++ do ++ { ++ tree head = TREE_VALUE (args); ++ if (head) ++ { ++ if (!loongarch_process_target_attr (head, fndecl)) ++ return false; ++ } ++ args = TREE_CHAIN (args); ++ } while (args); ++ ++ return true; ++ } ++ ++ if (TREE_CODE (args) != STRING_CST) ++ { ++ error_at (loc, "attribute % argument not a string"); ++ return false; ++ } ++ ++ size_t len = strlen (TREE_STRING_POINTER (args)); ++ auto_vec buffer; ++ buffer.safe_grow (len + 1); ++ char *str_to_check = buffer.address (); ++ memcpy (str_to_check, TREE_STRING_POINTER (args), len + 1); ++ ++ if (len == 0) ++ { ++ error_at (loc, "malformed % pragma or attribute"); ++ return false; ++ } ++ ++ /* Used to catch empty spaces between commas i.e. ++ attribute ((target ("attr1,,attr2"))). */ ++ unsigned int num_commas = num_occurences_in_str (',', str_to_check); ++ ++ /* Handle multiple target attributes separated by ','. */ ++ char *token = strtok_r (str_to_check, ",", &str_to_check); ++ ++ unsigned int num_attrs = 0; ++ while (token) ++ { ++ num_attrs++; ++ if (!loongarch_process_one_target_attr (token, loc)) ++ return false; ++ ++ token = strtok_r (NULL, ",", &str_to_check); ++ } ++ ++ if (num_attrs != num_commas + 1) ++ { ++ error_at (loc, "malformed % pragma or attribute", ++ TREE_STRING_POINTER (args)); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to ++ process attribute ((target ("..."))). */ ++ ++bool ++loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int) ++{ ++ struct cl_target_option cur_target; ++ bool ret; ++ tree old_optimize; ++ tree new_target, new_optimize; ++ tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ ++ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); ++ ++ old_optimize ++ = build_optimization_node (&global_options, &global_options_set); ++ ++ /* If the function changed the optimization levels as well as setting ++ target options, start with the optimizations specified. */ ++ if (func_optimize && func_optimize != old_optimize) ++ cl_optimization_restore (&global_options, &global_options_set, ++ TREE_OPTIMIZATION (func_optimize)); ++ ++ /* Save the current target options to restore at the end. */ ++ cl_target_option_save (&cur_target, &global_options, &global_options_set); ++ ++ /* If fndecl already has some target attributes applied to it, unpack ++ them so that we add this attribute on top of them, rather than ++ overwriting them. */ ++ if (existing_target) ++ { ++ struct cl_target_option *existing_options ++ = TREE_TARGET_OPTION (existing_target); ++ ++ if (existing_options) ++ cl_target_option_restore (&global_options, &global_options_set, ++ existing_options); ++ } ++ else ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (target_option_current_node)); ++ ++ ret = loongarch_process_target_attr (args, fndecl); ++ ++ /* Set up any additional state. */ ++ if (ret) ++ { ++ loongarch_option_override_internal (&la_target, ++ &global_options, ++ &global_options_set); ++ new_target = build_target_option_node (&global_options, ++ &global_options_set); ++ } ++ else ++ new_target = NULL; ++ ++ new_optimize = build_optimization_node (&global_options, ++ &global_options_set); ++ ++ if (fndecl && ret) ++ { ++ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; ++ ++ if (old_optimize != new_optimize) ++ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; ++ } ++ ++ cl_target_option_restore (&global_options, &global_options_set, &cur_target); ++ ++ if (old_optimize != new_optimize) ++ cl_optimization_restore (&global_options, &global_options_set, ++ TREE_OPTIMIZATION (old_optimize)); ++ return ret; ++} +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8b9f96f73..2524f897e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7708,7 +7708,7 @@ loongarch_reg_init (void) + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); + } + +-static void ++void + loongarch_option_override_internal (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set) +@@ -7734,9 +7734,6 @@ loongarch_option_override_internal (struct loongarch_target *target, + /* Override some options according to the resolved target. */ + loongarch_target_option_override (target, opts, opts_set); + +- target_option_default_node = target_option_current_node +- = build_target_option_node (opts, opts_set); +- + loongarch_reg_init (); + } + +@@ -7775,10 +7772,15 @@ loongarch_set_current_function (tree fndecl) + else + old_tree = target_option_default_node; + ++ /* When the function is optimized, the pop_cfun will be called, and ++ the fndecl will be NULL. */ + if (fndecl == NULL_TREE) + { + if (old_tree != target_option_current_node) + { ++ /* When this function is set with special options, we need to ++ restore the original global optimization options at the end ++ of function optimization. */ + loongarch_previous_fndecl = NULL_TREE; + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION +@@ -7788,6 +7790,9 @@ loongarch_set_current_function (tree fndecl) + } + + tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ ++ /* When no separate compilation parameters are set for the function, ++ new_tree is NULL. */ + if (new_tree == NULL_TREE) + new_tree = target_option_default_node; + +@@ -7796,9 +7801,14 @@ loongarch_set_current_function (tree fndecl) + if (new_tree == old_tree) + return; + ++ /* According to the settings of the functions attribute and pragma, ++ the options is corrected. */ + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (new_tree)); + ++ /* After correcting the value of options, we need to update the ++ rules for using the hardware registers to ensure that the ++ rules correspond to the options. */ + loongarch_reg_init (); + + loongarch_save_restore_target_globals (new_tree); +@@ -7819,6 +7829,11 @@ loongarch_option_override (void) + &global_options, + &global_options_set); + ++ /* Save the initial options so that we can restore the initial option ++ settings later when processing attributes and pragmas. */ ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (&global_options, &global_options_set); ++ + } + + /* Implement TARGET_OPTION_SAVE. */ +@@ -11372,6 +11387,9 @@ loongarch_asm_code_end (void) + #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + loongarch_builtin_support_vector_misalignment + ++#undef TARGET_OPTION_VALID_ATTRIBUTE_P ++#define TARGET_OPTION_VALID_ATTRIBUTE_P loongarch_option_valid_attribute_p ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-loongarch.h" +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 53dde9ce6..159f16f5d 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -47,6 +47,12 @@ loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.cc $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/loongarch/loongarch-c.cc + ++loongarch-target-attr.o: $(srcdir)/config/loongarch/loongarch-target-attr.cc \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) $(TREE_H) $(TM_H) \ ++ $(DIAGNOSTIC_CORE_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/loongarch/loongarch-target-attr.cc ++ + loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.cc $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ + $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \ +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index bcb9329c2..495ae65a2 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -2539,6 +2539,7 @@ GCC plugins may provide their own attributes. + * Epiphany Function Attributes:: + * H8/300 Function Attributes:: + * IA-64 Function Attributes:: ++* LoongArch Function Attributes:: + * M32C Function Attributes:: + * M32R/D Function Attributes:: + * m68k Function Attributes:: +@@ -5229,6 +5230,80 @@ extern int foo () __attribute__((version_id ("20040821"))); + Calls to @code{foo} are mapped to calls to @code{foo@{20040821@}}. + @end table + ++@node LoongArch Function Attributes ++@subsection LoongArch Function Attributes ++ ++These function attributes are supported by the LoongArch end: ++ ++@table @code ++@cindex @code{strict-align} function attribute, LoongArch ++@item strict-align ++@itemx no-strict-align ++@code{strict-align} indicates that the compiler should not assume that unaligned ++memory references are handled by the system. To allow the compiler to assume ++that aligned memory references are handled by the system, the inverse attribute ++@code{no-strict-align} can be specified. The behavior is same as for the ++command-line option @option{-mstrict-align} and @option{-mno-strict-align}. ++ ++@cindex @code{cmodel=} function attribute, LoongArch ++@item cmodel= ++Indicates that code should be generated for a particular code model for ++this function. The behavior and permissible arguments are the same as ++for the command-line option @option{-mcmodel=}. ++ ++@cindex @code{arch=} function attribute, LoongArch ++@item arch= ++Specifies the architecture version and architectural extensions to use ++for this function. The behavior and permissible arguments are the same as ++for the @option{-march=} command-line option. ++ ++@cindex @code{tune=} function attribute, LoongArch ++@item tune= ++Specifies the core for which to tune the performance of this function. ++The behavior and permissible arguments are the same as for the @option{-mtune=} ++command-line option. ++ ++@cindex @code{lsx} function attribute, LoongArch ++@item lsx ++@itemx no-lsx ++@code{lsx} indicates that vector instruction generation is allowed (not allowed) ++when compiling the function. The behavior is same as for the command-line option ++@option{-mlsx} and @option{-mno-lsx}. ++ ++@cindex @code{lasx} function attribute, LoongArch ++@item lasx ++@itemx no-lasx ++@code{lasx} indicates that lasx instruction generation is allowed (not allowed) ++when compiling the function. The behavior is slightly different from the ++command-line option @option{-mno-lasx}. ++Example: ++ ++@smallexample ++test.c: ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++ ++v4i32 a, b, c; ++#ifdef WITH_ATTR ++__attribute__ ((target("no-lasx"))) void ++#else ++void ++#endif ++test () ++@{ ++ c = a + b; ++@} ++@end smallexample ++@smallexample ++$ gcc test.c -o test.s -O2 -mlasx -DWITH_ATTR ++@end smallexample ++Compiled as above, 128-bit vectorization is possible. ++But the following method cannot perform 128-bit vectorization. ++@smallexample ++$ gcc test.c -o test.s -O2 -mlasx -mno-lasx ++@end smallexample ++ ++@end table ++ + @node M32C Function Attributes + @subsection M32C Function Attributes + +diff --git a/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c +new file mode 100644 +index 000000000..98cc7e577 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mno-lsx" } */ ++ ++extern char a[64]; ++extern char b[64]; ++ ++__attribute__ ((target ("arch=la64v1.1"))) ++void ++test (void) ++{ ++ for (int i = 0; i < 64; i++) ++ a[i] = b[i]; ++} ++ ++ ++/* { dg-final { scan-assembler "vld" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c +new file mode 100644 +index 000000000..82dcd1725 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-attributes" } */ ++ ++__attribute__ ((target ("mno-lsx"))) void ++test1 (void) /* { dg-error "attribute \\\'target\\\' argument \\\'mno-lsx\\\' is unknown" } */ ++{} ++ ++__attribute__ ((target (""))) void ++test2 (void) /* { dg-error "malformed \\\'target\\\(\\\)\\\' pragma or attribute" } */ ++{} ++ ++__attribute__ ((target ("no-cmodel="))) void ++test3 (void) /* { dg-error "pragma or attribute \\\'target\\\(\\\"cmodel\\\"\\\)\\\' does not allow a negated form" } */ ++{} ++ ++__attribute__ ((target ("cmodel=test"))) void ++test4 (void) /* { dg-error "pragma or attribute \\\'target\\\(\\\"cmodel=test\\\"\\\)\\\' is not valid" } */ ++{} ++ ++__attribute__ ((target ("test"))) void ++test5 (void) /* { dg-error "attribute \\\'target\\\' argument \\\'test\\\' is unknown" } */ ++{} ++ ++__attribute__ ((target (lsx))) void /* { dg-error "\\\'lsx\\\' undeclared here" } */ ++test6 (void) /* { dg-error "attribute \\\'target\\\' argument not a string" } */ ++{} ++ ++__attribute__ ((target ("lsx,"))) void ++test7 (void) /* { dg-error "malformed \\\'target\\\(\\\"lsx,\\\"\\\)\\\' pragma or attribute" } */ ++{} +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c +new file mode 100644 +index 000000000..119cd0e16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs=none" } */ ++ ++extern char a[8]; ++extern char b[8]; ++ ++__attribute__ ((target ("cmodel=extreme"))) ++void ++test (void) ++{ ++ a[0] = b[1]; ++ a[1] = b[2]; ++ a[2] = b[3]; ++ a[3] = b[4]; ++} ++ ++/* { dg-final { scan-assembler "la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,a" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c +new file mode 100644 +index 000000000..5dad9821f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-lsx" } */ ++ ++typedef int v8i32 __attribute__ ((vector_size(32), aligned(32))); ++extern v8i32 a, b, c; ++ ++__attribute__ ((target ("lasx"))) ++void ++test (void) ++{ ++ a = b + c; ++} ++ ++ ++/* { dg-final { scan-assembler "xvadd.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-2.c b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-2.c +new file mode 100644 +index 000000000..33cc924d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-2.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++ ++typedef int v8i32 __attribute__ ((vector_size(32), aligned(32))); ++extern v8i32 a, b, c; ++ ++__attribute__ ((target ("no-lasx"))) ++void ++test (void) ++{ ++ a = __builtin_lasx_xvadd_w (b, c); /* { dg-error "built-in function '__builtin_lasx_xvadd_w' is not enabled" } */ ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c +new file mode 100644 +index 000000000..3e2c1dc33 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-lsx" } */ ++ ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++extern v4i32 a, b, c; ++ ++__attribute__ ((target ("lsx"))) ++void ++test (void) ++{ ++ a = b + c; ++} ++ ++ ++/* { dg-final { scan-assembler "vadd.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-2.c b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-2.c +new file mode 100644 +index 000000000..97475fff5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-2.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++extern v4i32 a, b, c; ++ ++__attribute__ ((target ("no-lsx"))) ++void ++test (void) ++{ ++ a = __builtin_lsx_vadd_w (b, c); /* { dg-error "built-in function '__builtin_lsx_vadd_w' is not enabled" } */ ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c +new file mode 100644 +index 000000000..04893746d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mstrict-align" } */ ++extern char a[8]; ++extern char b[8]; ++ ++__attribute__ ((target ("no-strict-align"))) ++void ++test (void) ++{ ++ a[0] = b[1]; ++ a[1] = b[2]; ++ a[2] = b[3]; ++ a[3] = b[4]; ++} ++ ++ ++/* { dg-final { scan-assembler-not "ld.bu" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c +new file mode 100644 +index 000000000..0e81486cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-strict-align" } */ ++extern char a[8]; ++extern char b[8]; ++ ++__attribute__ ((target ("strict-align"))) ++void ++test (void) ++{ ++ a[0] = b[1]; ++ a[1] = b[2]; ++ a[2] = b[3]; ++ a[3] = b[4]; ++} ++ ++ ++/* { dg-final { scan-assembler-not "ld.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c +new file mode 100644 +index 000000000..655ca234b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++extern v4i32 a, b, c; ++ ++__attribute__ ((target ("no-lasx"))) ++void ++test (void) ++{ ++ a = b + c; ++} ++ ++ ++/* { dg-final { scan-assembler "vadd.w" } } */ +-- +2.47.3 + diff --git a/0032-LoongArch-Implement-target-pragma.patch b/0032-LoongArch-Implement-target-pragma.patch new file mode 100644 index 0000000..1346967 --- /dev/null +++ b/0032-LoongArch-Implement-target-pragma.patch @@ -0,0 +1,498 @@ +From 64c742decaecaed15a5be0f75f599268de909be4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 7 Jan 2025 12:00:12 +0800 +Subject: [PATCH 32/59] LoongArch: Implement target pragma. + +The target pragmas defined correspond to the target function attributes. + +This implementation is derived from AArch64. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_reset_previous_fndecl): Add function declaration. + (loongarch_save_restore_target_globals): Likewise. + (loongarch_register_pragmas): Likewise. + * config/loongarch/loongarch-target-attr.cc + (loongarch_option_valid_attribute_p): Optimize the processing + of attributes. + (loongarch_pragma_target_parse): New functions. + (loongarch_register_pragmas): Likewise. + * config/loongarch/loongarch.cc + (loongarch_reset_previous_fndecl): New functions. + (loongarch_set_current_function): When the old_tree is the same + as the new_tree, the rules for using registers, etc., + are set according to the option values to ensure that the + pragma can be processed correctly. + * config/loongarch/loongarch.h (REGISTER_TARGET_PRAGMAS): + Define macro. + * doc/extend.texi: Supplemental Documentation. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/arch-func-attr-1.c: Add '#pragma'. + * gcc.target/loongarch/cmodel-func-attr-1.c: Likewise. + * gcc.target/loongarch/lasx-func-attr-1.c: Likewise. + * gcc.target/loongarch/lsx-func-attr-1.c: Likewise. + * gcc.target/loongarch/strict_align-func-attr-1.c: Likewise. + * gcc.target/loongarch/strict_align-func-attr-2.c: Likewise. + * gcc.target/loongarch/vector-func-attr-1.c: Likewise. + * gcc.target/loongarch/arch-pragma-attr-1.c: Likewise. + * gcc.target/loongarch/cmodel-pragma-attr-1.c: New test. + * gcc.target/loongarch/lasx-pragma-attr-1.c: New test. + * gcc.target/loongarch/lasx-pragma-attr-2.c: New test. + * gcc.target/loongarch/lsx-pragma-attr-1.c: New test. + * gcc.target/loongarch/lsx-pragma-attr-2.c: New test. + * gcc.target/loongarch/strict_align-pragma-attr-1.c: New test. + * gcc.target/loongarch/strict_align-pragma-attr-2.c: New test. + * gcc.target/loongarch/vector-pragma-attr-1.c: New test. + * gcc.target/loongarch/pragma-push-pop.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 3 + + gcc/config/loongarch/loongarch-target-attr.cc | 59 +++++++++++++++++++ + gcc/config/loongarch/loongarch.cc | 19 +++--- + gcc/config/loongarch/loongarch.h | 2 + + .../gcc.target/loongarch/arch-func-attr-1.c | 6 +- + .../gcc.target/loongarch/arch-pragma-attr-1.c | 7 +++ + .../gcc.target/loongarch/cmodel-func-attr-1.c | 4 ++ + .../loongarch/cmodel-pragma-attr-1.c | 7 +++ + .../gcc.target/loongarch/lasx-func-attr-1.c | 4 ++ + .../gcc.target/loongarch/lasx-pragma-attr-1.c | 7 +++ + .../gcc.target/loongarch/lasx-pragma-attr-2.c | 12 ++++ + .../gcc.target/loongarch/lsx-func-attr-1.c | 4 ++ + .../gcc.target/loongarch/lsx-pragma-attr-1.c | 7 +++ + .../gcc.target/loongarch/lsx-pragma-attr-2.c | 12 ++++ + .../gcc.target/loongarch/pragma-push-pop.c | 22 +++++++ + .../loongarch/strict_align-func-attr-1.c | 4 ++ + .../loongarch/strict_align-func-attr-2.c | 4 ++ + .../loongarch/strict_align-pragma-attr-1.c | 7 +++ + .../loongarch/strict_align-pragma-attr-2.c | 7 +++ + .../gcc.target/loongarch/vector-func-attr-1.c | 4 ++ + .../loongarch/vector-pragma-attr-1.c | 7 +++ + 21 files changed, 200 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/arch-pragma-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-pragma-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pragma-push-pop.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector-pragma-attr-1.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 0096cd6e8..b76c9ce8c 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -216,4 +216,7 @@ extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); + extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int); + extern void loongarch_option_override_internal (struct loongarch_target *, struct gcc_options *, struct gcc_options *); ++extern void loongarch_reset_previous_fndecl (void); ++extern void loongarch_save_restore_target_globals (tree new_tree); ++extern void loongarch_register_pragmas (void); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc +index 6bb1e6b75..cee7031ca 100644 +--- a/gcc/config/loongarch/loongarch-target-attr.cc ++++ b/gcc/config/loongarch/loongarch-target-attr.cc +@@ -349,6 +349,16 @@ loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int) + tree new_target, new_optimize; + tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + ++ /* If what we're processing is the current pragma string then the ++ target option node is already stored in target_option_current_node ++ by loongarch_pragma_target_parse in loongarch-target-attr.cc. ++ Use that to avoid having to re-parse the string. */ ++ if (!existing_target && args == current_target_pragma) ++ { ++ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node; ++ return true; ++ } ++ + tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + + old_optimize +@@ -411,3 +421,52 @@ loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int) + TREE_OPTIMIZATION (old_optimize)); + return ret; + } ++ ++/* Hook to validate the current #pragma GCC target and set the state, and ++ update the macros based on what was changed. If ARGS is NULL, then ++ POP_TARGET is used to reset the options. */ ++ ++static bool ++loongarch_pragma_target_parse (tree args, tree pop_target) ++{ ++ /* If args is not NULL then process it and setup the target-specific ++ information that it specifies. */ ++ if (args) ++ { ++ if (!loongarch_process_target_attr (args, NULL)) ++ return false; ++ ++ loongarch_option_override_internal (&la_target, ++ &global_options, ++ &global_options_set); ++ } ++ ++ /* args is NULL, restore to the state described in pop_target. */ ++ else ++ { ++ pop_target = pop_target ? pop_target : target_option_default_node; ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (pop_target)); ++ } ++ ++ target_option_current_node ++ = build_target_option_node (&global_options, &global_options_set); ++ ++ loongarch_reset_previous_fndecl (); ++ ++ /* If we're popping or reseting make sure to update the globals so that ++ the optab availability predicates get recomputed. */ ++ if (pop_target) ++ loongarch_save_restore_target_globals (pop_target); ++ ++ return true; ++} ++ ++/* Implement REGISTER_TARGET_PRAGMAS. */ ++ ++void ++loongarch_register_pragmas (void) ++{ ++ /* Update pragma hook to allow parsing #pragma GCC target. */ ++ targetm.target_option.pragma_parse = loongarch_pragma_target_parse; ++} +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 2524f897e..16ffa340e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7741,11 +7741,17 @@ loongarch_option_override_internal (struct loongarch_target *target, + + static GTY(()) tree loongarch_previous_fndecl; + ++void ++loongarch_reset_previous_fndecl (void) ++{ ++ loongarch_previous_fndecl = NULL; ++} ++ + /* Restore or save the TREE_TARGET_GLOBALS from or to new_tree. + Used by loongarch_set_current_function to + make sure optab availability predicates are recomputed when necessary. */ + +-static void ++void + loongarch_save_restore_target_globals (tree new_tree) + { + if (TREE_TARGET_GLOBALS (new_tree)) +@@ -7798,13 +7804,12 @@ loongarch_set_current_function (tree fndecl) + + loongarch_previous_fndecl = fndecl; + +- if (new_tree == old_tree) +- return; ++ if (new_tree != old_tree) ++ /* According to the settings of the functions attribute and pragma, ++ the options is corrected. */ ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (new_tree)); + +- /* According to the settings of the functions attribute and pragma, +- the options is corrected. */ +- cl_target_option_restore (&global_options, &global_options_set, +- TREE_TARGET_OPTION (new_tree)); + + /* After correcting the value of options, we need to update the + rules for using the hardware registers to ensure that the +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a23dabde1..c93df7ad9 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -26,6 +26,8 @@ along with GCC; see the file COPYING3. If not see + + #define SWITCHABLE_TARGET 1 + ++#define REGISTER_TARGET_PRAGMAS() loongarch_register_pragmas () ++ + #define TARGET_SUPPORTS_WIDE_INT 1 + + /* Macros to silence warnings about numbers being signed in traditional +diff --git a/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c +index 98cc7e577..b8e51e6d9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/arch-func-attr-1.c +@@ -1,10 +1,14 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=loongarch64 -mno-lsx" } */ ++/* { dg-options "-O2 -march=loongarch64 -mno-lsx -std=gnu11" } */ + + extern char a[64]; + extern char b[64]; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("arch=la64v1.1"))) ++#else ++#pragma GCC target ("arch=la64v1.1") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/arch-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/arch-pragma-attr-1.c +new file mode 100644 +index 000000000..bd918e709 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/arch-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mno-lsx -std=gnu11" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./arch-func-attr-1.c" ++ ++/* { dg-final { scan-assembler "vld" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c +index 119cd0e16..9f44dc66b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-func-attr-1.c +@@ -4,7 +4,11 @@ + extern char a[8]; + extern char b[8]; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("cmodel=extreme"))) ++#else ++#pragma GCC target ("cmodel=extreme") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-pragma-attr-1.c +new file mode 100644 +index 000000000..b52289148 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs=none" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./cmodel-func-attr-1.c" ++ ++/* { dg-final { scan-assembler "la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,a" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c +index 5dad9821f..720719e80 100644 +--- a/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-func-attr-1.c +@@ -4,7 +4,11 @@ + typedef int v8i32 __attribute__ ((vector_size(32), aligned(32))); + extern v8i32 a, b, c; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("lasx"))) ++#else ++#pragma GCC target ("lasx") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-1.c +new file mode 100644 +index 000000000..d5bc68f1c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-lsx" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./lasx-func-attr-1.c" ++ ++/* { dg-final { scan-assembler "xvadd.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-2.c b/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-2.c +new file mode 100644 +index 000000000..67e4f7179 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-pragma-attr-2.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++ ++typedef int v8i32 __attribute__ ((vector_size(32), aligned(32))); ++extern v8i32 a, b, c; ++ ++#pragma GCC target ("no-lasx") ++void ++test (void) ++{ ++ a = __builtin_lasx_xvadd_w (b, c); /* { dg-error "built-in function '__builtin_lasx_xvadd_w' is not enabled" } */ ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c +index 3e2c1dc33..3558898d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-func-attr-1.c +@@ -4,7 +4,11 @@ + typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); + extern v4i32 a, b, c; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("lsx"))) ++#else ++#pragma GCC target ("lsx") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-1.c +new file mode 100644 +index 000000000..c499f18fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-lsx" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./lsx-func-attr-1.c" ++ ++/* { dg-final { scan-assembler "vadd.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-2.c b/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-2.c +new file mode 100644 +index 000000000..40314d026 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-pragma-attr-2.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++extern v4i32 a, b, c; ++ ++#pragma GCC target ("no-lsx") ++void ++test (void) ++{ ++ a = __builtin_lsx_vadd_w (b, c); /* { dg-error "built-in function '__builtin_lsx_vadd_w' is not enabled" } */ ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pragma-push-pop.c b/gcc/testsuite/gcc.target/loongarch/pragma-push-pop.c +new file mode 100644 +index 000000000..a2bcdcb10 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pragma-push-pop.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++/* { dg-final { scan-assembler-not "xvadd\\\.w" } } */ ++/* { dg-final { scan-assembler "xvsll\\\.w" } } */ ++ ++#include ++ ++extern v8i32 a, b, c; ++#pragma GCC push_options ++#pragma GCC target ("no-lasx") ++void ++test (void) ++{ ++ a = b + c; ++} ++#pragma GCC pop_options ++ ++void ++test1 (void) ++{ ++ c = __builtin_lasx_xvsll_w (a, b); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c +index 04893746d..c1ed6515c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-1.c +@@ -3,7 +3,11 @@ + extern char a[8]; + extern char b[8]; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("no-strict-align"))) ++#else ++#pragma GCC target ("no-strict-align") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c +index 0e81486cd..70bf81003 100644 +--- a/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-func-attr-2.c +@@ -3,7 +3,11 @@ + extern char a[8]; + extern char b[8]; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("strict-align"))) ++#else ++#pragma GCC target ("strict-align") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-1.c +new file mode 100644 +index 000000000..a95d0b972 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mstrict-align" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./strict_align-func-attr-1.c" ++ ++/* { dg-final { scan-assembler-not "ld.bu" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-2.c b/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-2.c +new file mode 100644 +index 000000000..93b76c59b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/strict_align-pragma-attr-2.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mno-strict-align" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./strict_align-func-attr-2.c" ++ ++/* { dg-final { scan-assembler-not "ld.w" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c b/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c +index 655ca234b..4e00606b1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector-func-attr-1.c +@@ -4,7 +4,11 @@ + typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); + extern v4i32 a, b, c; + ++#ifndef TEST_TARGET_PRAGMA + __attribute__ ((target ("no-lasx"))) ++#else ++#pragma GCC target ("no-lasx") ++#endif + void + test (void) + { +diff --git a/gcc/testsuite/gcc.target/loongarch/vector-pragma-attr-1.c b/gcc/testsuite/gcc.target/loongarch/vector-pragma-attr-1.c +new file mode 100644 +index 000000000..7bbb16901 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector-pragma-attr-1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++#define TEST_TARGET_PRAGMA 1 ++#include "./vector-func-attr-1.c" ++ ++/* { dg-final { scan-assembler "vadd.w" } } */ +-- +2.47.3 + diff --git a/0033-LoongArch-Fix-wrong-code-with-optab-_alsl_reversesi_.patch b/0033-LoongArch-Fix-wrong-code-with-optab-_alsl_reversesi_.patch new file mode 100644 index 0000000..d7425fe --- /dev/null +++ b/0033-LoongArch-Fix-wrong-code-with-optab-_alsl_reversesi_.patch @@ -0,0 +1,89 @@ +From a47dcb3a254932e7ad329e21caa4ca9961e5ef6b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 21 Jan 2025 23:01:38 +0800 +Subject: [PATCH 33/59] LoongArch: Fix wrong code with + _alsl_reversesi_extended + +The second source register of this insn cannot be the same as the +destination register. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md + (_alsl_reversesi_extended): Add '&' to the destination + register constraint and append '0' to the first source register + constraint to indicate the destination register cannot be same + as the second source register, and change the split condition to + reload_completed so that the insn will be split only after RA in + order to obtain allocated registers that satisfy the above + constraints. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bitwise-shift-reassoc-clobber.c: New + test. +--- + gcc/config/loongarch/loongarch.md | 6 +++--- + .../loongarch/bitwise-shift-reassoc-clobber.c | 21 +++++++++++++++++++ + 2 files changed, 24 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-clobber.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 5d5771999..6fd3dda56 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3170,13 +3170,13 @@ + ;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on + ;; our own. + (define_insn_and_split "_alsl_reversesi_extended" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=&r") + (sign_extend:DI + (plus:SI + (subreg:SI + (any_bitwise:DI + (ashift:DI +- (match_operand:DI 1 "register_operand" "r") ++ (match_operand:DI 1 "register_operand" "r0") + (match_operand:SI 2 "const_immalsl_operand" "")) + (match_operand:DI 3 "const_int_operand" "i")) + 0) +@@ -3185,7 +3185,7 @@ + && loongarch_reassoc_shift_bitwise (, operands[2], operands[3], + SImode)" + "#" +- "&& true" ++ "&& reload_completed" + [; r0 = r1 [&|^] r3 is emitted in PREPARATION-STATEMENTS because we + ; need to handle a special case, see below. + (set (match_dup 0) +diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-clobber.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-clobber.c +new file mode 100644 +index 000000000..9985a18ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc-clobber.c +@@ -0,0 +1,21 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++register long x asm ("s0"); ++ ++#define TEST(x) (int)(((x & 0x114) << 3) + x) ++ ++[[gnu::noipa]] void ++test (void) ++{ ++ x = TEST (x); ++} ++ ++int ++main (void) ++{ ++ x = 0xffff; ++ test (); ++ if (x != TEST (0xffff)) ++ __builtin_trap (); ++} +-- +2.47.3 + diff --git a/0034-LoongArch-Fix-invalid-subregs-in-xorsign-PR118501.patch b/0034-LoongArch-Fix-invalid-subregs-in-xorsign-PR118501.patch new file mode 100644 index 0000000..37c93bf --- /dev/null +++ b/0034-LoongArch-Fix-invalid-subregs-in-xorsign-PR118501.patch @@ -0,0 +1,35 @@ +From 2f87fec306ccd4c2224183d220c2c767d37c69fa Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 22 Jan 2025 17:16:29 +0800 +Subject: [PATCH 34/59] LoongArch: Fix invalid subregs in xorsign [PR118501] + +The test case added in r15-7073 now triggers an ICE, indicating we need +the same fix as AArch64. + +gcc/ChangeLog: + + PR target/118501 + * config/loongarch/loongarch.md (@xorsign3): Use + force_lowpart_subreg. +--- + gcc/config/loongarch/loongarch.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6fd3dda56..858aefb38 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1356,8 +1356,8 @@ + machine_mode lsx_mode + = mode == SFmode ? V4SFmode : V2DFmode; + rtx tmp = gen_reg_rtx (lsx_mode); +- rtx op1 = lowpart_subreg (lsx_mode, operands[1], mode); +- rtx op2 = lowpart_subreg (lsx_mode, operands[2], mode); ++ rtx op1 = force_lowpart_subreg (lsx_mode, operands[1], mode); ++ rtx op2 = force_lowpart_subreg (lsx_mode, operands[2], mode); + emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); + emit_move_insn (operands[0], + lowpart_subreg (mode, tmp, lsx_mode)); +-- +2.47.3 + diff --git a/0035-LoongArch-Fix-ICE-caused-by-illegal-calls-to-builtin.patch b/0035-LoongArch-Fix-ICE-caused-by-illegal-calls-to-builtin.patch new file mode 100644 index 0000000..0773b07 --- /dev/null +++ b/0035-LoongArch-Fix-ICE-caused-by-illegal-calls-to-builtin.patch @@ -0,0 +1,67 @@ +From b1b4a1fe231de86b6ac29c184c1cc0c361be0ea9 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Wed, 22 Jan 2025 17:57:21 +0800 +Subject: [PATCH 35/59] LoongArch: Fix ICE caused by illegal calls to builtin + functions [PR118561]. + + PR target/118561 + +gcc/ChangeLog: + + * config/loongarch/loongarch-builtins.cc + (loongarch_expand_builtin_lsx_test_branch): + NULL_RTX will not be returned when an error is detected. + (loongarch_expand_builtin): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr118561.c: New test. +--- + gcc/config/loongarch/loongarch-builtins.cc | 7 +++++-- + gcc/testsuite/gcc.target/loongarch/pr118561.c | 9 +++++++++ + 2 files changed, 14 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118561.c + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index b3ec7f33d..8492a5bda 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -2991,7 +2991,10 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code icode, tree exp) + ops[1].value = force_reg (ops[1].mode, ops[1].value); + + if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX) +- error ("failed to expand built-in function"); ++ { ++ error ("failed to expand built-in function"); ++ return const0_rtx; ++ } + + cmp_result = gen_reg_rtx (SImode); + +@@ -3031,7 +3034,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + { + error_at (EXPR_LOCATION (exp), + "built-in function %qD is not enabled", fndecl); +- return target; ++ return target ? target : const0_rtx; + } + + switch (d->builtin_type) +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118561.c b/gcc/testsuite/gcc.target/loongarch/pr118561.c +new file mode 100644 +index 000000000..81a776ead +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118561.c +@@ -0,0 +1,9 @@ ++/* PR target/118561: ICE with -mfpu=none */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=none" } */ ++ ++int ++test (void) ++{ ++ return __builtin_loongarch_movfcsr2gr (0); /* { dg-error "built-in function '__builtin_loongarch_movfcsr2gr' is not enabled" } */ ++} +-- +2.47.3 + diff --git a/0036-LoongArch-Correct-the-mode-for-mask-eq-ne-z.patch b/0036-LoongArch-Correct-the-mode-for-mask-eq-ne-z.patch new file mode 100644 index 0000000..f30a68b --- /dev/null +++ b/0036-LoongArch-Correct-the-mode-for-mask-eq-ne-z.patch @@ -0,0 +1,58 @@ +From d1a0b391912c7d1b23ff5511c02650ba51caddc8 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 19 Jan 2025 21:26:59 +0800 +Subject: [PATCH 36/59] LoongArch: Correct the mode for mask{eq,ne}z + +For mask{eq,ne}z, rk is always compared with 0 in the full width, thus +the mode for rk should be X. + +I found the issue reviewing a patch fixing a similar issue for RISC-V +XTheadCondMov [1], but interestingly I cannot find a test case really +blowing up on LoongArch. But as the issue is obvious enough let's fix +it anyway so it won't blow up in the future. + +[1]: https://gcc.gnu.org/pipermail/gcc-patches/2025-January/674004.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md + (*sel_using_): Rename to ... + (*sel_using_): ... here. + (GPR2): Remove as nothing uses it now. +--- + gcc/config/loongarch/loongarch.md | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 858aefb38..c86909ec7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -375,10 +375,6 @@ + ;; from the same template. + (define_mode_iterator GPR [SI (DI "TARGET_64BIT")]) + +-;; A copy of GPR that can be used when a pattern has two independent +-;; modes. +-(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) +- + ;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit + ;; FPR patterns to be generated from the same template. + (define_mode_iterator JOIN_MODE [HI +@@ -2516,11 +2512,11 @@ + + ;; Conditional move instructions. + +-(define_insn "*sel_using_" ++(define_insn "*sel_using_" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (if_then_else:GPR +- (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r") +- (const_int 0)) ++ (equality_op:X (match_operand:X 1 "register_operand" "r,r") ++ (const_int 0)) + (match_operand:GPR 2 "reg_or_0_operand" "r,J") + (match_operand:GPR 3 "reg_or_0_operand" "J,r")))] + "register_operand (operands[2], mode) +-- +2.47.3 + diff --git a/0037-LoongArch-Move-the-function-loongarch_register_pragm.patch b/0037-LoongArch-Move-the-function-loongarch_register_pragm.patch new file mode 100644 index 0000000..b8d6b2a --- /dev/null +++ b/0037-LoongArch-Move-the-function-loongarch_register_pragm.patch @@ -0,0 +1,160 @@ +From 3a0c1d6f3e20744b7daf25da285e31a506726b0b Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Wed, 12 Feb 2025 09:57:02 +0800 +Subject: [PATCH 37/59] LoongArch: Move the function loongarch_register_pragmas + to loongarch-c.cc. + +gcc/ChangeLog: + + * config/loongarch/loongarch-target-attr.cc + (loongarch_pragma_target_parse): Move to ... + (loongarch_register_pragmas): Move to ... + * config/loongarch/loongarch-c.cc + (loongarch_pragma_target_parse): ... here. + (loongarch_register_pragmas): ... here. + * config/loongarch/loongarch-protos.h + (loongarch_process_target_attr): Function Declaration. +--- + gcc/config/loongarch/loongarch-c.cc | 51 +++++++++++++++++++ + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch-target-attr.cc | 48 ----------------- + 3 files changed, 52 insertions(+), 48 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 4ecea6a45..8c22196b3 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -23,9 +23,11 @@ along with GCC; see the file COPYING3. If not see + #include "config.h" + #include "system.h" + #include "coretypes.h" ++#include "target.h" + #include "tm.h" + #include "c-family/c-common.h" + #include "cpplib.h" ++#include "tm_p.h" + + #define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM) + #define builtin_define(TXT) cpp_define (pfile, TXT) +@@ -134,3 +136,52 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32); + + } ++ ++/* Hook to validate the current #pragma GCC target and set the state, and ++ update the macros based on what was changed. If ARGS is NULL, then ++ POP_TARGET is used to reset the options. */ ++ ++static bool ++loongarch_pragma_target_parse (tree args, tree pop_target) ++{ ++ /* If args is not NULL then process it and setup the target-specific ++ information that it specifies. */ ++ if (args) ++ { ++ if (!loongarch_process_target_attr (args, NULL)) ++ return false; ++ ++ loongarch_option_override_internal (&la_target, ++ &global_options, ++ &global_options_set); ++ } ++ ++ /* args is NULL, restore to the state described in pop_target. */ ++ else ++ { ++ pop_target = pop_target ? pop_target : target_option_default_node; ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (pop_target)); ++ } ++ ++ target_option_current_node ++ = build_target_option_node (&global_options, &global_options_set); ++ ++ loongarch_reset_previous_fndecl (); ++ ++ /* If we're popping or reseting make sure to update the globals so that ++ the optab availability predicates get recomputed. */ ++ if (pop_target) ++ loongarch_save_restore_target_globals (pop_target); ++ ++ return true; ++} ++ ++/* Implement REGISTER_TARGET_PRAGMAS. */ ++ ++void ++loongarch_register_pragmas (void) ++{ ++ /* Update pragma hook to allow parsing #pragma GCC target. */ ++ targetm.target_option.pragma_parse = loongarch_pragma_target_parse; ++} +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index b76c9ce8c..b50b88585 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -219,4 +219,5 @@ extern void loongarch_option_override_internal (struct loongarch_target *, struc + extern void loongarch_reset_previous_fndecl (void); + extern void loongarch_save_restore_target_globals (tree new_tree); + extern void loongarch_register_pragmas (void); ++extern bool loongarch_process_target_attr (tree args, tree fndecl); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc +index cee7031ca..cb537446d 100644 +--- a/gcc/config/loongarch/loongarch-target-attr.cc ++++ b/gcc/config/loongarch/loongarch-target-attr.cc +@@ -422,51 +422,3 @@ loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int) + return ret; + } + +-/* Hook to validate the current #pragma GCC target and set the state, and +- update the macros based on what was changed. If ARGS is NULL, then +- POP_TARGET is used to reset the options. */ +- +-static bool +-loongarch_pragma_target_parse (tree args, tree pop_target) +-{ +- /* If args is not NULL then process it and setup the target-specific +- information that it specifies. */ +- if (args) +- { +- if (!loongarch_process_target_attr (args, NULL)) +- return false; +- +- loongarch_option_override_internal (&la_target, +- &global_options, +- &global_options_set); +- } +- +- /* args is NULL, restore to the state described in pop_target. */ +- else +- { +- pop_target = pop_target ? pop_target : target_option_default_node; +- cl_target_option_restore (&global_options, &global_options_set, +- TREE_TARGET_OPTION (pop_target)); +- } +- +- target_option_current_node +- = build_target_option_node (&global_options, &global_options_set); +- +- loongarch_reset_previous_fndecl (); +- +- /* If we're popping or reseting make sure to update the globals so that +- the optab availability predicates get recomputed. */ +- if (pop_target) +- loongarch_save_restore_target_globals (pop_target); +- +- return true; +-} +- +-/* Implement REGISTER_TARGET_PRAGMAS. */ +- +-void +-loongarch_register_pragmas (void) +-{ +- /* Update pragma hook to allow parsing #pragma GCC target. */ +- targetm.target_option.pragma_parse = loongarch_pragma_target_parse; +-} +-- +2.47.3 + diff --git a/0038-LoongArch-Split-the-function-loongarch_cpu_cpp_built.patch b/0038-LoongArch-Split-the-function-loongarch_cpu_cpp_built.patch new file mode 100644 index 0000000..4392f7a --- /dev/null +++ b/0038-LoongArch-Split-the-function-loongarch_cpu_cpp_built.patch @@ -0,0 +1,195 @@ +From 3d8b8d734af55c0e4a3f016794a7e1fa13ac92e6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 11 Feb 2025 20:05:13 +0800 +Subject: [PATCH 38/59] LoongArch: Split the function + loongarch_cpu_cpp_builtins into two functions. + +Split the implementation of the function loongarch_cpu_cpp_builtins into two parts: + 1. Macro definitions that do not change (only considering 64-bit architecture) + 2. Macro definitions that change with different compilation options. + +gcc/ChangeLog: + + * config/loongarch/loongarch-c.cc (builtin_undef): New macro. + (loongarch_cpu_cpp_builtins): Split to loongarch_update_cpp_builtins + and loongarch_define_unconditional_macros. + (loongarch_def_or_undef): New functions. + (loongarch_define_unconditional_macros): Likewise. + (loongarch_update_cpp_builtins): Likewise. +--- + gcc/config/loongarch/loongarch-c.cc | 106 ++++++++++++++++++++-------- + 1 file changed, 78 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 8c22196b3..2d42d000d 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -31,26 +31,22 @@ along with GCC; see the file COPYING3. If not see + + #define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM) + #define builtin_define(TXT) cpp_define (pfile, TXT) ++#define builtin_undef(TXT) cpp_undef (pfile, TXT) + #define builtin_assert(TXT) cpp_assert (pfile, TXT) + +-void +-loongarch_cpu_cpp_builtins (cpp_reader *pfile) ++static void ++loongarch_def_or_undef (bool def_p, const char *macro, cpp_reader *pfile) + { +- builtin_assert ("machine=loongarch"); +- builtin_assert ("cpu=loongarch"); +- builtin_define ("__loongarch__"); +- +- builtin_define_with_value ("__loongarch_arch", +- loongarch_arch_strings[la_target.cpu_arch], 1); +- +- builtin_define_with_value ("__loongarch_tune", +- loongarch_tune_strings[la_target.cpu_tune], 1); +- +- builtin_define_with_value ("_LOONGARCH_ARCH", +- loongarch_arch_strings[la_target.cpu_arch], 1); ++ if (def_p) ++ cpp_define (pfile, macro); ++ else ++ cpp_undef (pfile, macro); ++} + +- builtin_define_with_value ("_LOONGARCH_TUNE", +- loongarch_tune_strings[la_target.cpu_tune], 1); ++static void ++loongarch_define_unconditional_macros (cpp_reader *pfile) ++{ ++ builtin_define ("__loongarch__"); + + /* Base architecture / ABI. */ + if (TARGET_64BIT) +@@ -66,6 +62,48 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define ("__loongarch_lp64"); + } + ++ /* Add support for FLOAT128_TYPE on the LoongArch architecture. */ ++ builtin_define ("__FLOAT128_TYPE__"); ++ ++ /* Map the old _Float128 'q' builtins into the new 'f128' builtins. */ ++ builtin_define ("__builtin_fabsq=__builtin_fabsf128"); ++ builtin_define ("__builtin_copysignq=__builtin_copysignf128"); ++ builtin_define ("__builtin_nanq=__builtin_nanf128"); ++ builtin_define ("__builtin_nansq=__builtin_nansf128"); ++ builtin_define ("__builtin_infq=__builtin_inff128"); ++ builtin_define ("__builtin_huge_valq=__builtin_huge_valf128"); ++ ++ /* Native Data Sizes. */ ++ builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE); ++ builtin_define_with_int_value ("_LOONGARCH_FPSET", 32); ++ builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32); ++} ++ ++static void ++loongarch_update_cpp_builtins (cpp_reader *pfile) ++{ ++ /* Since the macros in this function might be redefined, it's necessary to ++ undef them first.*/ ++ builtin_undef ("__loongarch_arch"); ++ builtin_define_with_value ("__loongarch_arch", ++ loongarch_arch_strings[la_target.cpu_arch], 1); ++ ++ builtin_undef ("__loongarch_tune"); ++ builtin_define_with_value ("__loongarch_tune", ++ loongarch_tune_strings[la_target.cpu_tune], 1); ++ ++ builtin_undef ("_LOONGARCH_ARCH"); ++ builtin_define_with_value ("_LOONGARCH_ARCH", ++ loongarch_arch_strings[la_target.cpu_arch], 1); ++ ++ builtin_undef ("_LOONGARCH_TUNE"); ++ builtin_define_with_value ("_LOONGARCH_TUNE", ++ loongarch_tune_strings[la_target.cpu_tune], 1); ++ ++ builtin_undef ("__loongarch_double_float"); ++ builtin_undef ("__loongarch_single_float"); + /* These defines reflect the ABI in use, not whether the + FPU is directly accessible. */ + if (TARGET_DOUBLE_FLOAT_ABI) +@@ -73,6 +111,8 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else if (TARGET_SINGLE_FLOAT_ABI) + builtin_define ("__loongarch_single_float=1"); + ++ builtin_undef ("__loongarch_soft_float"); ++ builtin_undef ("__loongarch_hard_float"); + if (TARGET_DOUBLE_FLOAT_ABI || TARGET_SINGLE_FLOAT_ABI) + builtin_define ("__loongarch_hard_float=1"); + else +@@ -80,6 +120,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + + + /* ISA Extensions. */ ++ builtin_undef ("__loongarch_frlen"); + if (TARGET_DOUBLE_FLOAT) + builtin_define ("__loongarch_frlen=64"); + else if (TARGET_SINGLE_FLOAT) +@@ -87,24 +128,22 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + +- if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE) +- builtin_define ("__loongarch_frecipe"); ++ loongarch_def_or_undef (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE, ++ "__loongarch_frecipe", pfile); ++ ++ loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile); ++ loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile); ++ loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile); + ++ builtin_undef ("__loongarch_simd_width"); + if (ISA_HAS_LSX) + { +- builtin_define ("__loongarch_simd"); +- builtin_define ("__loongarch_sx"); +- +- if (!ISA_HAS_LASX) ++ if (ISA_HAS_LASX) ++ builtin_define ("__loongarch_simd_width=256"); ++ else + builtin_define ("__loongarch_simd_width=128"); + } + +- if (ISA_HAS_LASX) +- { +- builtin_define ("__loongarch_asx"); +- builtin_define ("__loongarch_simd_width=256"); +- } +- + /* ISA evolution features */ + int max_v_major = 1, max_v_minor = 0; + +@@ -122,6 +161,8 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + } + + /* Find the minimum ISA version required to run the target program. */ ++ builtin_undef ("__loongarch_version_major"); ++ builtin_undef ("__loongarch_version_minor"); + if (!(max_v_major == 1 && max_v_minor <= 1 && ISA_HAS_LASX)) + { + builtin_define_with_int_value ("__loongarch_version_major", max_v_major); +@@ -134,7 +175,16 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE); + builtin_define_with_int_value ("_LOONGARCH_FPSET", 32); + builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32); ++} ++ ++void ++loongarch_cpu_cpp_builtins (cpp_reader *pfile) ++{ ++ builtin_assert ("machine=loongarch"); ++ builtin_assert ("cpu=loongarch"); + ++ loongarch_define_unconditional_macros (pfile); ++ loongarch_update_cpp_builtins (pfile); + } + + /* Hook to validate the current #pragma GCC target and set the state, and +-- +2.47.3 + diff --git a/0039-LoongArch-After-setting-the-compilation-options-upda.patch b/0039-LoongArch-After-setting-the-compilation-options-upda.patch new file mode 100644 index 0000000..d4115a3 --- /dev/null +++ b/0039-LoongArch-After-setting-the-compilation-options-upda.patch @@ -0,0 +1,218 @@ +From c536985255b1152a3ffeb2f7587151029b34722d Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 11 Feb 2025 20:36:17 +0800 +Subject: [PATCH 39/59] LoongArch: After setting the compilation options, + update the predefined macros. + + PR target/118828 + +gcc/ChangeLog: + + * config/loongarch/loongarch-c.cc (loongarch_pragma_target_parse): + Update the predefined macros. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr118828.c: New test. + * gcc.target/loongarch/pr118828-2.c: New test. + * gcc.target/loongarch/pr118828-3.c: New test. + * gcc.target/loongarch/pr118828-4.c: New test. +--- + gcc/config/loongarch/loongarch-c.cc | 14 ++++++++ + .../gcc.target/loongarch/pr118828-2.c | 30 ++++++++++++++++ + .../gcc.target/loongarch/pr118828-3.c | 32 +++++++++++++++++ + .../gcc.target/loongarch/pr118828-4.c | 32 +++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/pr118828.c | 34 +++++++++++++++++++ + 5 files changed, 142 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118828-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118828-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118828-4.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118828.c + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 2d42d000d..f8b5d072e 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see + #include "tm.h" + #include "c-family/c-common.h" + #include "cpplib.h" ++#include "c-family/c-pragma.h" + #include "tm_p.h" + + #define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM) +@@ -219,6 +220,19 @@ loongarch_pragma_target_parse (tree args, tree pop_target) + + loongarch_reset_previous_fndecl (); + ++ /* For the definitions, ensure all newly defined macros are considered ++ as used for -Wunused-macros. There is no point warning about the ++ compiler predefined macros. */ ++ cpp_options *cpp_opts = cpp_get_options (parse_in); ++ unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros; ++ cpp_opts->warn_unused_macros = 0; ++ ++ cpp_force_token_locations (parse_in, BUILTINS_LOCATION); ++ loongarch_update_cpp_builtins (parse_in); ++ cpp_stop_forcing_token_locations (parse_in); ++ ++ cpp_opts->warn_unused_macros = saved_warn_unused_macros; ++ + /* If we're popping or reseting make sure to update the globals so that + the optab availability predicates get recomputed. */ + if (pop_target) +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118828-2.c b/gcc/testsuite/gcc.target/loongarch/pr118828-2.c +new file mode 100644 +index 000000000..3d32fcc15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118828-2.c +@@ -0,0 +1,30 @@ ++/* { dg-do preprocess } */ ++/* { dg-options "-mno-lsx" } */ ++ ++#ifdef __loongarch_sx ++#error LSX should not be available here ++#endif ++ ++#ifdef __loongarch_simd_width ++#error simd width shuold not be available here ++#endif ++ ++#pragma GCC push_options ++#pragma GCC target("lsx") ++#ifndef __loongarch_sx ++#error LSX should be available here ++#endif ++#ifndef __loongarch_simd_width ++#error simd width should be available here ++#elif __loongarch_simd_width != 128 ++#error simd width should be 128 ++#endif ++#pragma GCC pop_options ++ ++#ifdef __loongarch_sx ++#error LSX should become unavailable again ++#endif ++ ++#ifdef __loongarch_simd_width ++#error simd width shuold become unavailable again ++#endif +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118828-3.c b/gcc/testsuite/gcc.target/loongarch/pr118828-3.c +new file mode 100644 +index 000000000..31ab8e59a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118828-3.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64" } */ ++/* { dg-final { scan-assembler "t1: loongarch64" } } */ ++/* { dg-final { scan-assembler "t2: la64v1.1" } } */ ++/* { dg-final { scan-assembler "t3: loongarch64" } } */ ++ ++#ifndef __loongarch_arch ++#error __loongarch_arch should be available here ++#endif ++ ++void ++t1 (void) ++{ ++ asm volatile ("# t1: " __loongarch_arch); ++} ++ ++#pragma GCC push_options ++#pragma GCC target("arch=la64v1.1") ++ ++void ++t2 (void) ++{ ++ asm volatile ("# t2: " __loongarch_arch); ++} ++ ++#pragma GCC pop_options ++ ++void ++t3 (void) ++{ ++ asm volatile ("# t3: " __loongarch_arch); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118828-4.c b/gcc/testsuite/gcc.target/loongarch/pr118828-4.c +new file mode 100644 +index 000000000..77587ee56 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118828-4.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mtune=la464" } */ ++/* { dg-final { scan-assembler "t1: la464" } } */ ++/* { dg-final { scan-assembler "t2: la664" } } */ ++/* { dg-final { scan-assembler "t3: la464" } } */ ++ ++#ifndef __loongarch_tune ++#error __loongarch_tune should be available here ++#endif ++ ++void ++t1 (void) ++{ ++ asm volatile ("# t1: " __loongarch_tune); ++} ++ ++#pragma GCC push_options ++#pragma GCC target("tune=la664") ++ ++void ++t2 (void) ++{ ++ asm volatile ("# t2: " __loongarch_tune); ++} ++ ++#pragma GCC pop_options ++ ++void ++t3 (void) ++{ ++ asm volatile ("# t3: " __loongarch_tune); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118828.c b/gcc/testsuite/gcc.target/loongarch/pr118828.c +new file mode 100644 +index 000000000..abdda24c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118828.c +@@ -0,0 +1,34 @@ ++/* { dg-do preprocess } */ ++/* { dg-options "-mno-lasx" } */ ++ ++#ifdef __loongarch_asx ++#error LASX should not be available here ++#endif ++ ++#ifdef __loongarch_simd_width ++#if __loongarch_simd_width == 256 ++#error simd width shuold not be 256 ++#endif ++#endif ++ ++#pragma GCC push_options ++#pragma GCC target("lasx") ++#ifndef __loongarch_asx ++#error LASX should be available here ++#endif ++#ifndef __loongarch_simd_width ++#error simd width should be available here ++#elif __loongarch_simd_width != 256 ++#error simd width should be 256 ++#endif ++#pragma GCC pop_options ++ ++#ifdef __loongarch_asx ++#error LASX should become unavailable again ++#endif ++ ++#ifdef __loongarch_simd_width ++#if __loongarch_simd_width == 256 ++#error simd width shuold not be 256 again ++#endif ++#endif +-- +2.47.3 + diff --git a/0040-LoongArch-When-mfpu-none-__loongarch_frecipe-shouldn.patch b/0040-LoongArch-When-mfpu-none-__loongarch_frecipe-shouldn.patch new file mode 100644 index 0000000..418565e --- /dev/null +++ b/0040-LoongArch-When-mfpu-none-__loongarch_frecipe-shouldn.patch @@ -0,0 +1,88 @@ +From f890f7bef589089caf12f3d0fc95d36b9be06531 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Wed, 12 Feb 2025 11:50:50 +0800 +Subject: [PATCH 40/59] LoongArch: When -mfpu=none, '__loongarch_frecipe' + shouldn't be defined [PR118843]. + + PR target/118843 + +gcc/ChangeLog: + + * config/loongarch/loongarch-c.cc + (loongarch_update_cpp_builtins): Fix macro definition issues. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr118843.c: New test. +--- + gcc/config/loongarch/loongarch-c.cc | 32 +++++++++++-------- + gcc/testsuite/gcc.target/loongarch/pr118843.c | 6 ++++ + 2 files changed, 24 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr118843.c + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index f8b5d072e..781a573c9 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -129,9 +129,6 @@ loongarch_update_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + +- loongarch_def_or_undef (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE, +- "__loongarch_frecipe", pfile); +- + loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile); + loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile); + loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile); +@@ -149,18 +146,25 @@ loongarch_update_cpp_builtins (cpp_reader *pfile) + int max_v_major = 1, max_v_minor = 0; + + for (int i = 0; i < N_EVO_FEATURES; i++) +- if (la_target.isa.evolution & la_evo_feature_masks[i]) +- { +- builtin_define (la_evo_macro_name[i]); +- +- int major = la_evo_version_major[i], +- minor = la_evo_version_minor[i]; +- +- max_v_major = major > max_v_major ? major : max_v_major; +- max_v_minor = major == max_v_major +- ? (minor > max_v_minor ? minor : max_v_minor): max_v_minor; +- } ++ { ++ builtin_undef (la_evo_macro_name[i]); + ++ if (la_target.isa.evolution & la_evo_feature_masks[i] ++ && (la_evo_feature_masks[i] != OPTION_MASK_ISA_FRECIPE ++ || TARGET_HARD_FLOAT)) ++ { ++ builtin_define (la_evo_macro_name[i]); ++ ++ max_v_major = major > max_v_major ? major : max_v_major; ++ int major = la_evo_version_major[i], ++ minor = la_evo_version_minor[i]; ++ ++ max_v_major = major > max_v_major ? major : max_v_major; ++ max_v_minor = major == max_v_major ++ ? (minor > max_v_minor ? minor : max_v_minor) : max_v_minor; ++ } ++ } ++ + /* Find the minimum ISA version required to run the target program. */ + builtin_undef ("__loongarch_version_major"); + builtin_undef ("__loongarch_version_minor"); +diff --git a/gcc/testsuite/gcc.target/loongarch/pr118843.c b/gcc/testsuite/gcc.target/loongarch/pr118843.c +new file mode 100644 +index 000000000..30372b8ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr118843.c +@@ -0,0 +1,6 @@ ++/* { dg-do preprocess } */ ++/* { dg-options "-mfrecipe -mfpu=none" } */ ++ ++#ifdef __loongarch_frecipe ++#error __loongarch_frecipe should not be avaliable here ++#endif +-- +2.47.3 + diff --git a/0041-LoongArch-Adjust-the-cost-of-ADDRESS_REG_REG.patch b/0041-LoongArch-Adjust-the-cost-of-ADDRESS_REG_REG.patch new file mode 100644 index 0000000..44ebe0a --- /dev/null +++ b/0041-LoongArch-Adjust-the-cost-of-ADDRESS_REG_REG.patch @@ -0,0 +1,180 @@ +From 398d22395ec1803189bf3dfccac3b2045c749b0f Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 10 Dec 2024 20:59:22 +0800 +Subject: [PATCH 41/59] LoongArch: Adjust the cost of ADDRESS_REG_REG. + +After changing this cost from 1 to 3, the performance of spec2006 +401 473 416 465 482 can be improved by about 2% on LA664. + +Add option '-maddr-reg-reg-cost='. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Add + option '-maddr-reg-reg-cost='. + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Initialize + addr_reg_reg_cost to 3. + * config/loongarch/loongarch-opts.cc + (loongarch_target_option_override): If '-maddr-reg-reg-cost=' + is not used, set it to the initial value. + * config/loongarch/loongarch-tune.h + (struct loongarch_rtx_cost_data): Add the member + addr_reg_reg_cost and its assignment function to the structure + loongarch_rtx_cost_data. + * config/loongarch/loongarch.cc (loongarch_address_insns): + Use la_addr_reg_reg_cost to set the cost of ADDRESS_REG_REG. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. + * doc/invoke.texi: Add description of '-maddr-reg-reg-cost='. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/const-double-zero-stx.c: Add + '-maddr-reg-reg-cost=1'. + * gcc.target/loongarch/stack-check-alloca-1.c: Likewise. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ + gcc/config/loongarch/loongarch-def.cc | 1 + + gcc/config/loongarch/loongarch-opts.cc | 3 +++ + gcc/config/loongarch/loongarch-tune.h | 7 +++++++ + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.opt | 4 ++++ + gcc/config/loongarch/loongarch.opt.urls | 3 +++ + gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c | 2 +- + gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c | 2 +- + 9 files changed, 25 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 20795f6bd..4ffd969e3 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -177,6 +177,10 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + ++maddr-reg-reg-cost= ++Target RejectNegative Joined UInteger Var(la_addr_reg_reg_cost) Save ++-maddr-reg-reg-cost=COST Set the cost of ADDRESS_REG_REG to the value calculated by COST. ++ + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index c3f9fc6de..7f9fb6a37 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -136,6 +136,7 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + movcf2gr (COSTS_N_INSNS (7)), + movgr2cf (COSTS_N_INSNS (15)), + branch_cost (6), ++ addr_reg_reg_cost (3), + memory_latency (4) {} + + /* The following properties cannot be looked up directly using "cpucfg". +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 1d08bb6a1..0d9eb58b7 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -1010,6 +1010,9 @@ loongarch_target_option_override (struct loongarch_target *target, + if (!opts_set->x_la_branch_cost) + opts->x_la_branch_cost = loongarch_cost->branch_cost; + ++ if (!opts_set->x_la_addr_reg_reg_cost) ++ opts->x_la_addr_reg_reg_cost = loongarch_cost->addr_reg_reg_cost; ++ + /* other stuff */ + if (ABI_LP64_P (target->abi.base)) + opts->x_flag_pcc_struct_return = 0; +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index d286eee0b..b9434305e 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -38,6 +38,7 @@ struct loongarch_rtx_cost_data + unsigned short movcf2gr; + unsigned short movgr2cf; + unsigned short branch_cost; ++ unsigned short addr_reg_reg_cost; + unsigned short memory_latency; + + /* Default RTX cost initializer, implemented in loongarch-def.cc. */ +@@ -115,6 +116,12 @@ struct loongarch_rtx_cost_data + return *this; + } + ++ loongarch_rtx_cost_data addr_reg_reg_cost_ (unsigned short _addr_reg_reg_cost) ++ { ++ addr_reg_reg_cost = _addr_reg_reg_cost; ++ return *this; ++ } ++ + loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency) + { + memory_latency = _memory_latency; +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 16ffa340e..4322794e8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2373,7 +2373,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + return factor; + + case ADDRESS_REG_REG: +- return factor; ++ return factor * la_addr_reg_reg_cost; + + case ADDRESS_CONST_INT: + return lsx_p ? 0 : factor; +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index f9c7bd446..52469757f 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -185,6 +185,10 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + ++maddr-reg-reg-cost= ++Target RejectNegative Joined UInteger Var(la_addr_reg_reg_cost) Save ++-maddr-reg-reg-cost=COST Set the cost of ADDRESS_REG_REG to the value calculated by COST. ++ + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. +diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls +index 571c504e6..5f644f6c3 100644 +--- a/gcc/config/loongarch/loongarch.opt.urls ++++ b/gcc/config/loongarch/loongarch.opt.urls +@@ -27,6 +27,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2) + mbranch-cost= + UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2) + ++maddr-reg-reg-cost= ++UrlSuffix(gcc/LoongArch-Options.html#index-maddr-reg-reg-cost) ++ + mcheck-zero-division + UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division) + +diff --git a/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c +index 8fb04be8f..fd1bb49ff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c ++++ b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-O2 -maddr-reg-reg-cost=1" } */ + /* { dg-final { scan-assembler-times {stx\..\t\$r0} 2 } } */ + + extern float arr_f[]; +diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c +index 6ee589c4b..6168461b2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -maddr-reg-reg-cost=1" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ + /* { dg-require-effective-target alloca } */ + /* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */ +-- +2.47.3 + diff --git a/0042-LoongArch-Fix-the-issue-of-function-jump-out-of-rang.patch b/0042-LoongArch-Fix-the-issue-of-function-jump-out-of-rang.patch new file mode 100644 index 0000000..d13a926 --- /dev/null +++ b/0042-LoongArch-Fix-the-issue-of-function-jump-out-of-rang.patch @@ -0,0 +1,45 @@ +From dfd56cc7c7f90a86cfd4a89f4884e1e2740157ec Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Wed, 12 Feb 2025 14:29:58 +0800 +Subject: [PATCH 42/59] LoongArch: Fix the issue of function jump out of range + caused by crtbeginS.o [PR118844]. + +Due to the presence of R_LARCH_B26 in +/usr/lib/gcc/loongarch64-linux-gnu/14/crtbeginS.o, its addressing +range is [PC-128MiB, PC+128MiB-4]. This means that when the code +segment size exceeds 128MB, linking with lld will definitely fail +(ld will not fail because the order of the two is different). + +The linking order: + lld: crtbeginS.o + .text + .plt + ld : .plt + crtbeginS.o + .text + +To solve this issue, add '-mcmodel=extreme' when compiling crtbeginS.o. + + PR target/118844 + +libgcc/ChangeLog: + + * config/loongarch/t-crtstuff: Add '-mcmodel=extreme' + to CRTSTUFF_T_CFLAGS_S. +--- + libgcc/config/loongarch/t-crtstuff | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/libgcc/config/loongarch/t-crtstuff b/libgcc/config/loongarch/t-crtstuff +index b8c36eb66..2a2489b7e 100644 +--- a/libgcc/config/loongarch/t-crtstuff ++++ b/libgcc/config/loongarch/t-crtstuff +@@ -3,3 +3,9 @@ + # to .eh_frame data from crtbeginT.o instead of the user-defined object + # during static linking. + CRTSTUFF_T_CFLAGS += -fno-omit-frame-pointer -fno-asynchronous-unwind-tables ++ ++# As shown in the test case PR118844, when using lld for linking, ++# it fails due to B26 in crtbeginS.o causing the link to exceed the range. ++# Therefore, the issue was resolved by adding the compilation option ++# "-mcmodel=extreme" when compiling crtbeginS.o. ++CRTSTUFF_T_CFLAGS_S += -mcmodel=extreme +-- +2.47.3 + diff --git a/0043-LoongArch-Accept-ADD-IOR-or-XOR-when-combining-objec.patch b/0043-LoongArch-Accept-ADD-IOR-or-XOR-when-combining-objec.patch new file mode 100644 index 0000000..60d31b2 --- /dev/null +++ b/0043-LoongArch-Accept-ADD-IOR-or-XOR-when-combining-objec.patch @@ -0,0 +1,147 @@ +From e8562aed50bb881f9c0bbb8d8b5a1eca24e299dd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 10 Feb 2025 23:39:24 +0800 +Subject: [PATCH 43/59] LoongArch: Accept ADD, IOR or XOR when combining + objects with no bits in common [PR115478] + +Since r15-1120, multi-word shifts/rotates produces PLUS instead of IOR. +It's generally a good thing (allowing to use our alsl instruction or +similar instrunction on other architectures), but it's preventing us +from using bytepick. For example, if we shift a __int128 by 16 bits, +the higher word can be produced via a single bytepick.d instruction with +immediate 2, but we got: + + srli.d $r12,$r4,48 + slli.d $r5,$r5,16 + slli.d $r4,$r4,16 + add.d $r5,$r12,$r5 + jr $r1 + +This wasn't work with GCC 14, but after r15-6490 it's supposed to work +if IOR was used instead of PLUS. + +To fix this, add a code iterator to match IOR, XOR, and PLUS and use it +instead of just IOR if we know the operands have no overlapping bits. + +gcc/ChangeLog: + + PR target/115478 + * config/loongarch/loongarch.md (any_or_plus): New + define_code_iterator. + (bstrins__for_ior_mask): Use any_or_plus instead of ior. + (bytepick_w_): Likewise. + (bytepick_d_): Likewise. + (bytepick_d__rev): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/115478 + * gcc.target/loongarch/bytepick_shift_128.c: New test. +--- + gcc/config/loongarch/loongarch.md | 46 +++++++++++++------ + .../gcc.target/loongarch/bytepick_shift_128.c | 9 ++++ + 2 files changed, 41 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bytepick_shift_128.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c86909ec7..bf476251c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -491,6 +491,10 @@ + (xor "uns_arith_operand")]) + (define_code_attr is_and [(and "true") (ior "false") (xor "false")]) + ++;; If we know the operands does not have overlapping bits, use this ++;; instead of just ior to cover more cases. ++(define_code_iterator any_or_plus [any_or plus]) ++ + ;; This code iterator allows unsigned and signed division to be generated + ;; from the same template. + (define_code_iterator any_div [div udiv mod umod]) +@@ -1597,10 +1601,11 @@ + + (define_insn_and_split "*bstrins__for_ior_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "const_int_operand" "i")) +- (and:GPR (match_operand:GPR 3 "register_operand" "r") +- (match_operand:GPR 4 "const_int_operand" "i"))))] ++ (any_or_plus:GPR ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "const_int_operand" "i")) ++ (and:GPR (match_operand:GPR 3 "register_operand" "r") ++ (match_operand:GPR 4 "const_int_operand" "i"))))] + "loongarch_pre_reload_split () + && loongarch_use_bstrins_for_ior_with_mask (mode, operands)" + "#" +@@ -4249,12 +4254,13 @@ + } + [(set_attr "mode" "")]) + +-(define_insn "bytepick_w_" ++(define_insn "*bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") +- (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +- (const_int )) +- (ashift:SI (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount))))] ++ (any_or_plus:SI ++ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int )) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount))))] + "" + "bytepick.w\t%0,%1,%2," + [(set_attr "mode" "SI")]) +@@ -4292,16 +4298,28 @@ + "bytepick.w\t%0,%2,%1,1" + [(set_attr "mode" "SI")]) + +-(define_insn "bytepick_d_" ++(define_insn "*bytepick_d_" + [(set (match_operand:DI 0 "register_operand" "=r") +- (ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r") +- (const_int )) +- (ashift (match_operand:DI 2 "register_operand" "r") +- (const_int bytepick_d_ashift_amount))))] ++ (any_or_plus:DI ++ (lshiftrt (match_operand:DI 1 "register_operand" "r") ++ (const_int )) ++ (ashift (match_operand:DI 2 "register_operand" "r") ++ (const_int bytepick_d_ashift_amount))))] + "TARGET_64BIT" + "bytepick.d\t%0,%1,%2," + [(set_attr "mode" "DI")]) + ++(define_insn "*bytepick_d__rev" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (any_or_plus:DI ++ (ashift (match_operand:DI 1 "register_operand" "r") ++ (const_int bytepick_d_ashift_amount)) ++ (lshiftrt (match_operand:DI 2 "register_operand" "r") ++ (const_int ))))] ++ "TARGET_64BIT" ++ "bytepick.d\t%0,%2,%1," ++ [(set_attr "mode" "DI")]) ++ + (define_insn "bitrev_4b" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] +diff --git a/gcc/testsuite/gcc.target/loongarch/bytepick_shift_128.c b/gcc/testsuite/gcc.target/loongarch/bytepick_shift_128.c +new file mode 100644 +index 000000000..d3a977219 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bytepick_shift_128.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bytepick\\.d" } } */ ++ ++__int128 ++test (__int128 a) ++{ ++ return a << 16; ++} +-- +2.47.3 + diff --git a/0044-LoongArch-Try-harder-using-vrepli-instructions-to-ma.patch b/0044-LoongArch-Try-harder-using-vrepli-instructions-to-ma.patch new file mode 100644 index 0000000..49cfcf0 --- /dev/null +++ b/0044-LoongArch-Try-harder-using-vrepli-instructions-to-ma.patch @@ -0,0 +1,168 @@ +From c1457e8e3966ec01831cfc6bfac05fdc1a4aa4fe Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 24 Jan 2025 08:32:18 +0800 +Subject: [PATCH 44/59] LoongArch: Try harder using vrepli instructions to + materialize const vectors + +For + + a = (v4si){0xdddddddd, 0xdddddddd, 0xdddddddd, 0xdddddddd} + +we just want + + vrepli.b $vr0, 0xdd + +but the compiler actually produces a load: + + la.local $r14,.LC0 + vld $vr0,$r14,0 + +It's because we only tried vrepli.d which wouldn't work. Try all vrepli +instructions for const int vector materializing to fix it. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_const_vector_vrepli): New function prototype. + * config/loongarch/loongarch.cc (loongarch_const_vector_vrepli): + Implement. + (loongarch_const_insns): Call loongarch_const_vector_vrepli + instead of loongarch_const_vector_same_int_p. + (loongarch_split_vector_move_p): Likewise. + (loongarch_output_move): Use loongarch_const_vector_vrepli to + pun operend[1] into a better mode if it's a const int vector, + and decide the suffix of [x]vrepli with the new mode. + * config/loongarch/constraints.md (YI): Call + loongarch_const_vector_vrepli instead of + loongarch_const_vector_same_int_p. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vrepli.c: New test. +--- + gcc/config/loongarch/constraints.md | 2 +- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 34 ++++++++++++++++++--- + gcc/testsuite/gcc.target/loongarch/vrepli.c | 15 +++++++++ + 4 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vrepli.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index 7a090be1e..9a28fbc48 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -301,7 +301,7 @@ + A replicated vector const in which the replicated value is in the range + [-512,511]." + (and (match_code "const_vector") +- (match_test "loongarch_const_vector_same_int_p (op, mode, -512, 511)"))) ++ (match_test "loongarch_const_vector_vrepli (op, mode)"))) + + (define_constraint "YC" + "@internal +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index b50b88585..363b47926 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, + extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode); + extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode); + extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); ++extern rtx loongarch_const_vector_vrepli (rtx, machine_mode); + extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool); + extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT); + extern enum reg_class loongarch_secondary_reload_class (enum reg_class, +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4322794e8..1f72f4922 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1837,6 +1837,28 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode) + return true; + } + ++rtx ++loongarch_const_vector_vrepli (rtx x, machine_mode mode) ++{ ++ int size = GET_MODE_SIZE (mode); ++ ++ if (GET_CODE (x) != CONST_VECTOR ++ || GET_MODE_CLASS (mode) != MODE_VECTOR_INT) ++ return NULL_RTX; ++ ++ for (scalar_int_mode elem_mode: {QImode, HImode, SImode, DImode}) ++ { ++ machine_mode new_mode = ++ mode_for_vector (elem_mode, size / GET_MODE_SIZE (elem_mode)) ++ .require (); ++ rtx op = lowpart_subreg (new_mode, x, mode); ++ if (loongarch_const_vector_same_int_p (op, new_mode, -512, 511)) ++ return op; ++ } ++ ++ return NULL_RTX; ++} ++ + /* Return true if rtx constants of mode MODE should be put into a small + data section. */ + +@@ -2491,7 +2513,7 @@ loongarch_const_insns (rtx x) + case CONST_VECTOR: + if ((LSX_SUPPORTED_MODE_P (GET_MODE (x)) + || LASX_SUPPORTED_MODE_P (GET_MODE (x))) +- && loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511)) ++ && loongarch_const_vector_vrepli (x, GET_MODE (x))) + return 1; + /* Fall through. */ + case CONST_DOUBLE: +@@ -4646,7 +4668,7 @@ loongarch_split_vector_move_p (rtx dest, rtx src) + /* Check for vector set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) +- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) ++ && loongarch_const_vector_vrepli (src, GET_MODE (src))) + return false; + + /* Check for vector load zero immediate. */ +@@ -4782,13 +4804,15 @@ loongarch_output_move (rtx *operands) + && src_code == CONST_VECTOR + && CONST_INT_P (CONST_VECTOR_ELT (src, 0))) + { +- gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511)); ++ operands[1] = loongarch_const_vector_vrepli (src, mode); ++ gcc_assert (operands[1]); ++ + switch (GET_MODE_SIZE (mode)) + { + case 16: +- return "vrepli.%v0\t%w0,%E1"; ++ return "vrepli.%v1\t%w0,%E1"; + case 32: +- return "xvrepli.%v0\t%u0,%E1"; ++ return "xvrepli.%v1\t%u0,%E1"; + default: gcc_unreachable (); + } + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vrepli.c b/gcc/testsuite/gcc.target/loongarch/vrepli.c +new file mode 100644 +index 000000000..8deeb4788 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vrepli.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvrepli\\.b\t\\\$vr\[0-9\]+,-35" } } */ ++/* { dg-final { scan-assembler "\txvrepli\\.b\t\\\$xr\[0-9\]+,-35" } } */ ++ ++int f __attribute__((vector_size (16))); ++int g __attribute__((vector_size (32))); ++ ++void ++test (void) ++{ ++ constexpr int x = (int) 0xdddddddd; ++ f = (typeof(f)){x, x, x, x}; ++ g = (typeof(g)){x, x, x, x, x, x, x, x}; ++} +-- +2.47.3 + diff --git a/0045-LoongArch-Allow-moving-TImode-vectors.patch b/0045-LoongArch-Allow-moving-TImode-vectors.patch new file mode 100644 index 0000000..3269b1f --- /dev/null +++ b/0045-LoongArch-Allow-moving-TImode-vectors.patch @@ -0,0 +1,222 @@ +From af5db3526083ec272d92f63f2dc715bde29c583d Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 24 Jan 2025 08:33:39 +0800 +Subject: [PATCH 45/59] LoongArch: Allow moving TImode vectors + +We have some vector instructions for operations on 128-bit integer, i.e. +TImode, vectors. Previously they had been modeled with unspecs, but +it's more natural to just model them with TImode vector RTL expressions. + +For the preparation, allow moving V1TImode and V2TImode vectors in LSX +and LASX registers so we won't get a reload failure when we start to +save TImode vectors in these registers. + +This implicitly depends on the vrepli optimization: without it we'd try +"vrepli.q" which does not really exist and trigger an ICE. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (mov): Remove. + (movmisalign): Remove. + (mov_lsx): Remove. + * config/loongarch/lasx.md (mov): Remove. + (movmisalign): Remove. + (mov_lasx): Remove. + * config/loongarch/loongarch-modes.def (V1TI): Add. + (V2TI): Mention in the comment. + * config/loongarch/loongarch.md (mode): Add V1TI and V2TI. + * config/loongarch/simd.md (ALLVEC_TI): New mode iterator. + (mov): Likewise. + (mov_simd): New define_insn_and_split. +--- + gcc/config/loongarch/lasx.md | 40 ---------------------- + gcc/config/loongarch/loongarch-modes.def | 3 +- + gcc/config/loongarch/loongarch.md | 2 +- + gcc/config/loongarch/lsx.md | 36 -------------------- + gcc/config/loongarch/simd.md | 42 ++++++++++++++++++++++++ + 5 files changed, 45 insertions(+), 78 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 640134acc..6b1a6d604 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -699,46 +699,6 @@ + DONE; + }) + +-(define_expand "mov" +- [(set (match_operand:LASX 0) +- (match_operand:LASX 1))] +- "ISA_HAS_LASX" +-{ +- if (loongarch_legitimize_move (mode, operands[0], operands[1])) +- DONE; +-}) +- +- +-(define_expand "movmisalign" +- [(set (match_operand:LASX 0) +- (match_operand:LASX 1))] +- "ISA_HAS_LASX" +-{ +- if (loongarch_legitimize_move (mode, operands[0], operands[1])) +- DONE; +-}) +- +-;; 256-bit LASX modes can only exist in LASX registers or memory. +-(define_insn "mov_lasx" +- [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f") +- (match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))] +- "ISA_HAS_LASX" +- { return loongarch_output_move (operands); } +- [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") +- (set_attr "mode" "") +- (set_attr "length" "8,4,4,4,4")]) +- +- +-(define_split +- [(set (match_operand:LASX 0 "nonimmediate_operand") +- (match_operand:LASX 1 "move_operand"))] +- "reload_completed && ISA_HAS_LASX +- && loongarch_split_move_p (operands[0], operands[1])" +- [(const_int 0)] +-{ +- loongarch_split_move (operands[0], operands[1]); +- DONE; +-}) + + ;; LASX + (define_insn "add3" +diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def +index ac9ea3142..fa556bc97 100644 +--- a/gcc/config/loongarch/loongarch-modes.def ++++ b/gcc/config/loongarch/loongarch-modes.def +@@ -32,9 +32,10 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ + /* For LARCH LSX 128 bits. */ + VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ + VECTOR_MODES (FLOAT, 16); /* V4SF V2DF */ ++VECTOR_MODE (INT, TI, 1); /* V1TI */ + + /* For LARCH LASX 256 bits. */ +-VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ ++VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI V2TI */ + VECTOR_MODES (FLOAT, 32); /* V8SF V4DF */ + + /* Double-sized vector modes for vec_concat. */ +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index bf476251c..cd2336f11 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -188,7 +188,7 @@ + + ;; Main data type used by the insn + (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FCC, +- V2DI,V4SI,V8HI,V16QI,V2DF,V4SF,V4DI,V8SI,V16HI,V32QI,V4DF,V8SF" ++ V1TI,V2DI,V4SI,V8HI,V16QI,V2DF,V4SF,V2TI,V4DI,V8SI,V16HI,V32QI,V4DF,V8SF" + (const_string "unknown")) + + ;; True if the main data type is twice the size of a word. +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index cf6d05900..bce3149d4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -575,42 +575,6 @@ + [(set_attr "type" "simd_sld") + (set_attr "mode" "")]) + +-(define_expand "mov" +- [(set (match_operand:LSX 0) +- (match_operand:LSX 1))] +- "ISA_HAS_LSX" +-{ +- if (loongarch_legitimize_move (mode, operands[0], operands[1])) +- DONE; +-}) +- +-(define_expand "movmisalign" +- [(set (match_operand:LSX 0) +- (match_operand:LSX 1))] +- "ISA_HAS_LSX" +-{ +- if (loongarch_legitimize_move (mode, operands[0], operands[1])) +- DONE; +-}) +- +-(define_insn "mov_lsx" +- [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") +- (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))] +- "ISA_HAS_LSX" +-{ return loongarch_output_move (operands); } +- [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") +- (set_attr "mode" "")]) +- +-(define_split +- [(set (match_operand:LSX 0 "nonimmediate_operand") +- (match_operand:LSX 1 "move_operand"))] +- "reload_completed && ISA_HAS_LSX +- && loongarch_split_move_p (operands[0], operands[1])" +- [(const_int 0)] +-{ +- loongarch_split_move (operands[0], operands[1]); +- DONE; +-}) + + ;; Integer operations + (define_insn "add3" +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index d7f1e6ea2..ceb751e74 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -133,6 +133,48 @@ + ;; instruction here so we can avoid duplicating logics. + ;; ======================================================================= + ++ ++;; Move ++ ++;; Some immediate values in V1TI or V2TI may be stored in LSX or LASX ++;; registers, thus we need to allow moving them for reload. ++(define_mode_iterator ALLVEC_TI [ALLVEC ++ (V1TI "ISA_HAS_LSX") ++ (V2TI "ISA_HAS_LASX")]) ++ ++(define_expand "mov" ++ [(set (match_operand:ALLVEC_TI 0) ++ (match_operand:ALLVEC_TI 1))] ++ "" ++{ ++ if (loongarch_legitimize_move (mode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_expand "movmisalign" ++ [(set (match_operand:ALLVEC_TI 0) ++ (match_operand:ALLVEC_TI 1))] ++ "" ++{ ++ if (loongarch_legitimize_move (mode, operands[0], operands[1])) ++ DONE; ++}) ++ ++(define_insn_and_split "mov_simd" ++ [(set (match_operand:ALLVEC_TI 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") ++ (match_operand:ALLVEC_TI 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))] ++ "" ++{ return loongarch_output_move (operands); } ++ "reload_completed && loongarch_split_move_p (operands[0], operands[1])" ++ [(const_int 0)] ++{ ++ loongarch_split_move (operands[0], operands[1]); ++ DONE; ++} ++ [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") ++ (set_attr "mode" "")]) ++ ++ + ;; + ;; FP vector rounding instructions + ;; +-- +2.47.3 + diff --git a/0046-LoongArch-Implement-vec_widen_mult_-even-odd-_-for-L.patch b/0046-LoongArch-Implement-vec_widen_mult_-even-odd-_-for-L.patch new file mode 100644 index 0000000..0f06155 --- /dev/null +++ b/0046-LoongArch-Implement-vec_widen_mult_-even-odd-_-for-L.patch @@ -0,0 +1,108 @@ +From 9c58a3e3fcfa98655b422bb392fcc66cbb8859ed Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 20 Jan 2025 20:43:07 +0800 +Subject: [PATCH 46/59] LoongArch: Implement vec_widen_mult_{even,odd}_* for + LSX and LASX modes + +Since PR116142 has been fixed, now we can add the standard names so the +compiler will generate better code if the result of a widening +production is reduced. + +gcc/ChangeLog: + + * config/loongarch/simd.md (even_odd): New define_int_attr. + (vec_widen_mult__): New define_expand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/wide-mul-reduc-1.c: New test. + * gcc.target/loongarch/wide-mul-reduc-2.c: New test. +--- + gcc/config/loongarch/simd.md | 20 +++++++++++++++++++ + .../gcc.target/loongarch/wide-mul-reduc-1.c | 18 +++++++++++++++++ + .../gcc.target/loongarch/wide-mul-reduc-2.c | 17 ++++++++++++++++ + 3 files changed, 55 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index ceb751e74..3ff10700d 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -529,6 +529,26 @@ + DONE; + }) + ++;; Operations on elements at even/odd indices. ++(define_int_iterator zero_one [0 1]) ++(define_int_attr ev_od [(0 "ev") (1 "od")]) ++(define_int_attr even_odd [(0 "even") (1 "odd")]) ++ ++(define_expand "vec_widen_mult__" ++ [(match_operand: 0 "register_operand" "=f") ++ (match_operand:IVEC 1 "register_operand" " f") ++ (match_operand:IVEC 2 "register_operand" " f") ++ (any_extend (const_int 0)) ++ (const_int zero_one)] ++ "" ++{ ++ emit_insn ( ++ gen__vmulw__ (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) ++ + ;; FP negation. + (define_insn "neg2" + [(set (match_operand:FVEC 0 "register_operand" "=f") +diff --git a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-1.c b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-1.c +new file mode 100644 +index 000000000..d6e0da59d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump "WIDEN_MULT_EVEN_EXPR" "optimized" } } */ ++/* { dg-final { scan-tree-dump "WIDEN_MULT_ODD_EXPR" "optimized" } } */ ++ ++typedef __INT32_TYPE__ i32; ++typedef __INT64_TYPE__ i64; ++ ++i32 x[8], y[8]; ++ ++i64 ++test (void) ++{ ++ i64 ret = 0; ++ for (int i = 0; i < 8; i++) ++ ret ^= (i64) x[i] * y[i]; ++ return ret; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c +new file mode 100644 +index 000000000..07a760188 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++/* { dg-final { scan-assembler "xvmaddw(ev|od)\\.d\\.w" } } */ ++ ++typedef __INT32_TYPE__ i32; ++typedef __INT64_TYPE__ i64; ++ ++i32 x[8], y[8]; ++ ++i64 ++test (void) ++{ ++ i64 ret = 0; ++ for (int i = 0; i < 8; i++) ++ ret += (i64) x[i] * y[i]; ++ return ret; ++} +-- +2.47.3 + diff --git a/0047-LoongArch-Implement-su-dot_prod-for-LSX-and-LASX-mod.patch b/0047-LoongArch-Implement-su-dot_prod-for-LSX-and-LASX-mod.patch new file mode 100644 index 0000000..3392d99 --- /dev/null +++ b/0047-LoongArch-Implement-su-dot_prod-for-LSX-and-LASX-mod.patch @@ -0,0 +1,90 @@ +From bcae62c2e361f10d7665f76dd9e058cb2c1e2c75 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 20 Jan 2025 23:13:19 +0800 +Subject: [PATCH 47/59] LoongArch: Implement [su]dot_prod* for LSX and LASX + modes + +Despite it's just a special case of "a widening product of which the +result used for reduction," having these standard names allows to +recognize the dot product pattern earlier and it may be beneficial to +optimization. Also fix some test failures with the test cases: + +- gcc.dg/vect/vect-reduc-chain-2.c +- gcc.dg/vect/vect-reduc-chain-3.c +- gcc.dg/vect/vect-reduc-chain-dot-slp-3.c +- gcc.dg/vect/vect-reduc-chain-dot-slp-4.c + +gcc/ChangeLog: + + * config/loongarch/simd.md (wvec_half): New define_mode_attr. + (dot_prod): New define_expand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/wide-mul-reduc-2.c (dg-final): Scan + DOT_PROD_EXPR in optimized tree. +--- + gcc/config/loongarch/simd.md | 29 +++++++++++++++++++ + .../gcc.target/loongarch/wide-mul-reduc-2.c | 3 +- + 2 files changed, 31 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 3ff10700d..ecba22b2d 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -84,6 +84,12 @@ + (define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si") + (V4DF "v4di") (V8SF "v8si")]) + ++;; Lower-case version. ++(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti") ++ (V4SI "v2di") (V8SI "v4di") ++ (V8HI "v4si") (V16HI "v8si") ++ (V16QI "v8hi") (V32QI "v16hi")]) ++ + ;; Integer vector modes with the same size, in lower-case. + (define_mode_attr allmode_i [(V2DI "v2di") (V4SI "v4si") + (V8HI "v8hi") (V16QI "v16qi") +@@ -616,3 +622,26 @@ + + ; The LoongArch ASX Instructions. + (include "lasx.md") ++ ++(define_expand "dot_prod" ++ [(match_operand: 0 "register_operand" "=f,f") ++ (match_operand:IVEC 1 "register_operand" " f,f") ++ (match_operand:IVEC 2 "register_operand" " f,f") ++ (match_operand: 3 "reg_or_0_operand" " 0,YG") ++ (any_extend (const_int 0))] ++ "" ++{ ++ auto [op0, op1, op2, op3] = operands; ++ ++ if (op3 == CONST0_RTX (mode)) ++ emit_insn ( ++ gen__vmulwev__ (op0, op1, op2)); ++ else ++ emit_insn ( ++ gen__vmaddwev__ (op0, op3, op1, ++ op2)); ++ ++ emit_insn ( ++ gen__vmaddwod__ (op0, op0, op1, op2)); ++ DONE; ++}) +diff --git a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c +index 07a760188..61e92e58f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx" } */ ++/* { dg-options "-O2 -mlasx -fdump-tree-optimized" } */ + /* { dg-final { scan-assembler "xvmaddw(ev|od)\\.d\\.w" } } */ ++/* { dg-final { scan-tree-dump "DOT_PROD_EXPR" "optimized" } } */ + + typedef __INT32_TYPE__ i32; + typedef __INT64_TYPE__ i64; +-- +2.47.3 + diff --git a/0048-LoongArch-Use-normal-RTL-pattern-instead-of-UNSPEC-f.patch b/0048-LoongArch-Use-normal-RTL-pattern-instead-of-UNSPEC-f.patch new file mode 100644 index 0000000..f2d6872 --- /dev/null +++ b/0048-LoongArch-Use-normal-RTL-pattern-instead-of-UNSPEC-f.patch @@ -0,0 +1,190 @@ +From 3c3295ea1c39cb69b1f8ae496a6692e048f807d4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 13 Feb 2025 22:51:31 +0800 +Subject: [PATCH 48/59] LoongArch: Use normal RTL pattern instead of UNSPEC for + {x,}vsr{a,l}ri instructions + +Allowing (t + (1ul << imm >> 1)) >> imm to be recognized as a rounding +shift operation. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (UNSPEC_LASX_XVSRARI): Remove. + (UNSPEC_LASX_XVSRLRI): Remove. + (lasx_xvsrari_): Remove. + (lasx_xvsrlri_): Remove. + * config/loongarch/lsx.md (UNSPEC_LSX_VSRARI): Remove. + (UNSPEC_LSX_VSRLRI): Remove. + (lsx_vsrari_): Remove. + (lsx_vsrlri_): Remove. + * config/loongarch/simd.md (simd__imm_round_): New + define_insn. + (_vri_): New define_expand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-shift-imm-round.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 -------------- + gcc/config/loongarch/lsx.md | 22 -------------- + gcc/config/loongarch/simd.md | 29 +++++++++++++++++++ + .../loongarch/vect-shift-imm-round.c | 11 +++++++ + 4 files changed, 40 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shift-imm-round.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 6b1a6d604..ddff02011 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -43,9 +43,7 @@ + UNSPEC_LASX_XVSAT_U + UNSPEC_LASX_XVREPL128VEI + UNSPEC_LASX_XVSRAR +- UNSPEC_LASX_XVSRARI + UNSPEC_LASX_XVSRLR +- UNSPEC_LASX_XVSRLRI + UNSPEC_LASX_XVSHUF + UNSPEC_LASX_XVSHUF_B + UNSPEC_LASX_BRANCH +@@ -2131,16 +2129,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvsrari_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")] +- UNSPEC_LASX_XVSRARI))] +- "ISA_HAS_LASX" +- "xvsrari.\t%u0,%u1,%2" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvsrlr_" + [(set (match_operand:ILASX 0 "register_operand" "=f") + (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +@@ -2151,16 +2139,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvsrlri_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")] +- UNSPEC_LASX_XVSRLRI))] +- "ISA_HAS_LASX" +- "xvsrlri.\t%u0,%u1,%2" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvssub_s_" + [(set (match_operand:ILASX 0 "register_operand" "=f") + (ss_minus:ILASX (match_operand:ILASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index bce3149d4..e79d20d37 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -44,9 +44,7 @@ + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U + UNSPEC_LSX_VSRAR +- UNSPEC_LSX_VSRARI + UNSPEC_LSX_VSRLR +- UNSPEC_LSX_VSRLRI + UNSPEC_LSX_VSHUF + UNSPEC_LSX_VEXTW_S + UNSPEC_LSX_VEXTW_U +@@ -1884,16 +1882,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +-(define_insn "lsx_vsrari_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")] +- UNSPEC_LSX_VSRARI))] +- "ISA_HAS_LSX" +- "vsrari.\t%w0,%w1,%2" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vsrlr_" + [(set (match_operand:ILSX 0 "register_operand" "=f") + (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +@@ -1904,16 +1892,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "")]) + +-(define_insn "lsx_vsrlri_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")] +- UNSPEC_LSX_VSRLRI))] +- "ISA_HAS_LSX" +- "vsrlri.\t%w0,%w1,%2" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vssub_s_" + [(set (match_operand:ILSX 0 "register_operand" "=f") + (ss_minus:ILSX (match_operand:ILSX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index ecba22b2d..64122480b 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -617,6 +617,35 @@ + DONE; + }) + ++;; Integer shift right with rounding. ++(define_insn "simd__imm_round_" ++ [(set (match_operand:IVEC 0 "register_operand" "=f") ++ (any_shiftrt:IVEC ++ (plus:IVEC ++ (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:IVEC 2 "const_vector_same_val_operand" "Uuvx")) ++ (match_operand:SI 3 "const__operand" "I")))] ++ "(HOST_WIDE_INT_1U << UINTVAL (operands[3]) >> 1) ++ == UINTVAL (CONST_VECTOR_ELT (operands[2], 0))" ++ "vri.\t%0,%1,%d3" ++ [(set_attr "type" "simd_shift") ++ (set_attr "mode" "")]) ++ ++(define_expand "_vri_" ++ [(match_operand:IVEC 0 "register_operand" "=f") ++ (match_operand:IVEC 1 "register_operand" " f") ++ (match_operand 2 "const__operand") ++ (any_shiftrt (const_int 0) (const_int 0))] ++ "" ++{ ++ auto addend = HOST_WIDE_INT_1U << UINTVAL (operands[2]) >> 1; ++ rtx addend_v = loongarch_gen_const_int_vector (mode, addend); ++ ++ emit_insn (gen_simd__imm_round_ (operands[0], operands[1], ++ addend_v, operands[2])); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shift-imm-round.c b/gcc/testsuite/gcc.target/loongarch/vect-shift-imm-round.c +new file mode 100644 +index 000000000..6f16566ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-shift-imm-round.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mlsx" } */ ++/* { dg-final { scan-assembler "vsrari\\.w\t\\\$vr\[0-9\]+,\\\$vr\[0-9\]+,15" } } */ ++ ++int x __attribute__ ((vector_size (16))); ++ ++void ++f (void) ++{ ++ x = (x + (1 << 14)) >> 15; ++} +-- +2.47.3 + diff --git a/0049-LoongArch-Fix-incorrect-reorder-of-__lsx_vldx-and-__.patch b/0049-LoongArch-Fix-incorrect-reorder-of-__lsx_vldx-and-__.patch new file mode 100644 index 0000000..a06416a --- /dev/null +++ b/0049-LoongArch-Fix-incorrect-reorder-of-__lsx_vldx-and-__.patch @@ -0,0 +1,260 @@ +From 2d1525d7a95018970bc181e0cceaa992d1f91311 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 2 Mar 2025 19:02:50 +0800 +Subject: [PATCH 49/59] LoongArch: Fix incorrect reorder of __lsx_vldx and + __lasx_xvldx [PR119084] + +They could be incorrectly reordered with store instructions like st.b +because the RTL expression does not have a memory_operand or a (mem) +expression. The incorrect reorder has been observed in openh264 LTO +build. + +Expand them to a (mem) expression instead of unspec to fix the issue. +Then we need to make loongarch_address_insns return 1 for +ADDRESS_REG_REG because the constraint "R" expects this behavior, or +the vldx instruction will be considered invalid by the register +allocate pass and turned to add.d + vld. Apply the ADDRESS_REG_REG +penalty in loongarch_address_cost instead, loongarch_rtx_costs should +also call loongarch_address_cost instead of loongarch_address_insns +then. + +Closes: https://github.com/cisco/openh264/issues/3857 + +gcc/ChangeLog: + + PR target/119084 + * config/loongarch/lasx.md (UNSPEC_LASX_XVLDX): Remove. + (lasx_xvldx): Remove. + * config/loongarch/lsx.md (UNSPEC_LSX_VLDX): Remove. + (lsx_vldx): Remove. + * config/loongarch/simd.md (QIVEC): New define_mode_iterator. + (_vldx): New define_expand. + * config/loongarch/loongarch.cc (loongarch_address_insns_1): New + static function with most logic factored out from ... + (loongarch_address_insns): ... here. Call + loongarch_address_insns_1 with reg_reg_cost = 1. + (loongarch_address_cost): Call loongarch_address_insns_1 with + reg_reg_cost = la_addr_reg_reg_cost. + +gcc/testsuite/ChangeLog: + + PR target/119084 + * gcc.target/loongarch/pr119084.c: New test. +--- + gcc/config/loongarch/lasx.md | 13 ----- + gcc/config/loongarch/loongarch.cc | 48 +++++++++++-------- + gcc/config/loongarch/lsx.md | 13 ----- + gcc/config/loongarch/simd.md | 9 ++++ + gcc/testsuite/gcc.target/loongarch/pr119084.c | 24 ++++++++++ + 5 files changed, 61 insertions(+), 46 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr119084.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index ddff02011..73ca7caaa 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -145,7 +145,6 @@ + UNSPEC_LASX_XVSSRLRN + UNSPEC_LASX_XVEXTL_QU_DU + UNSPEC_LASX_XVLDI +- UNSPEC_LASX_XVLDX + UNSPEC_LASX_XVSTX + UNSPEC_LASX_VECINIT_MERGE + UNSPEC_LASX_VEC_SET_INTERNAL +@@ -4605,18 +4604,6 @@ + [(set_attr "type" "simd_load") + (set_attr "mode" "V4DI")]) + +-(define_insn "lasx_xvldx" +- [(set (match_operand:V32QI 0 "register_operand" "=f") +- (unspec:V32QI [(match_operand:DI 1 "register_operand" "r") +- (match_operand:DI 2 "reg_or_0_operand" "rJ")] +- UNSPEC_LASX_XVLDX))] +- "ISA_HAS_LASX" +-{ +- return "xvldx\t%u0,%1,%z2"; +-} +- [(set_attr "type" "simd_load") +- (set_attr "mode" "V32QI")]) +- + (define_insn "lasx_xvstx" + [(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "reg_or_0_operand" "rJ"))) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1f72f4922..c76f59d42 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2353,14 +2353,9 @@ loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED) + return true; + } + +-/* Return the number of instructions needed to load or store a value +- of mode MODE at address X. Return 0 if X isn't valid for MODE. +- Assume that multiword moves may need to be split into word moves +- if MIGHT_SPLIT_P, otherwise assume that a single load or store is +- enough. */ +- +-int +-loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) ++static int ++loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p, ++ int reg_reg_cost) + { + struct loongarch_address_info addr; + int factor; +@@ -2395,7 +2390,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + return factor; + + case ADDRESS_REG_REG: +- return factor * la_addr_reg_reg_cost; ++ return factor * reg_reg_cost; + + case ADDRESS_CONST_INT: + return lsx_p ? 0 : factor; +@@ -2410,6 +2405,18 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + return 0; + } + ++/* Return the number of instructions needed to load or store a value ++ of mode MODE at address X. Return 0 if X isn't valid for MODE. ++ Assume that multiword moves may need to be split into word moves ++ if MIGHT_SPLIT_P, otherwise assume that a single load or store is ++ enough. */ ++ ++int ++loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) ++{ ++ return loongarch_address_insns_1 (x, mode, might_split_p, 1); ++} ++ + /* Return true if X fits within an unsigned field of BITS bits that is + shifted left SHIFT bits before being used. */ + +@@ -3736,6 +3743,17 @@ loongarch_set_reg_reg_cost (machine_mode mode) + } + } + ++/* Implement TARGET_ADDRESS_COST. */ ++ ++static int ++loongarch_address_cost (rtx addr, machine_mode mode, ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed ATTRIBUTE_UNUSED) ++{ ++ return loongarch_address_insns_1 (addr, mode, false, ++ la_addr_reg_reg_cost); ++} ++ + /* Implement TARGET_RTX_COSTS. */ + + static bool +@@ -3804,7 +3822,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + *total = COSTS_N_INSNS (2); + return true; + } +- cost = loongarch_address_insns (addr, mode, true); ++ cost = loongarch_address_cost (addr, mode, true, speed); + if (cost > 0) + { + *total = COSTS_N_INSNS (cost + 1); +@@ -4391,16 +4409,6 @@ loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) + vector_costs::finish_cost (scalar_costs); + } + +-/* Implement TARGET_ADDRESS_COST. */ +- +-static int +-loongarch_address_cost (rtx addr, machine_mode mode, +- addr_space_t as ATTRIBUTE_UNUSED, +- bool speed ATTRIBUTE_UNUSED) +-{ +- return loongarch_address_insns (addr, mode, false); +-} +- + /* Implement TARGET_INSN_COST. */ + + static int +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index e79d20d37..8cf7ad917 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -87,7 +87,6 @@ + UNSPEC_LSX_VSSRLRN + UNSPEC_LSX_VLDI + UNSPEC_LSX_VSHUF_B +- UNSPEC_LSX_VLDX + UNSPEC_LSX_VSTX + UNSPEC_LSX_VEXTL_QU_DU + UNSPEC_LSX_VSETEQZ_V +@@ -2920,18 +2919,6 @@ + [(set_attr "type" "simd_shf") + (set_attr "mode" "V16QI")]) + +-(define_insn "lsx_vldx" +- [(set (match_operand:V16QI 0 "register_operand" "=f") +- (unspec:V16QI [(match_operand:DI 1 "register_operand" "r") +- (match_operand:DI 2 "reg_or_0_operand" "rJ")] +- UNSPEC_LSX_VLDX))] +- "ISA_HAS_LSX" +-{ +- return "vldx\t%w0,%1,%z2"; +-} +- [(set_attr "type" "simd_load") +- (set_attr "mode" "V16QI")]) +- + (define_insn "lsx_vstx" + [(set (mem:V16QI (plus:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "reg_or_0_operand" "rJ"))) +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 64122480b..b49f1a7f4 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -181,6 +181,15 @@ + (set_attr "mode" "")]) + + ++;; REG + REG load ++ ++(define_mode_iterator QIVEC [(V16QI "ISA_HAS_LSX") (V32QI "ISA_HAS_LASX")]) ++(define_expand "_vldx" ++ [(set (match_operand:QIVEC 0 "register_operand" "=f") ++ (mem:QIVEC (plus:DI (match_operand:DI 1 "register_operand") ++ (match_operand:DI 2 "register_operand"))))] ++ "TARGET_64BIT") ++ + ;; + ;; FP vector rounding instructions + ;; +diff --git a/gcc/testsuite/gcc.target/loongarch/pr119084.c b/gcc/testsuite/gcc.target/loongarch/pr119084.c +new file mode 100644 +index 000000000..b59433038 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr119084.c +@@ -0,0 +1,24 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-require-effective-target loongarch_sx_hw } */ ++ ++typedef signed char V16QI __attribute__ ((vector_size (16))); ++static char x[128]; ++ ++__attribute__ ((noipa)) int ++noopt (int x) ++{ ++ return x; ++} ++ ++int ++main (void) ++{ ++ int t = noopt (32); ++ ++ x[32] = 1; ++ ++ V16QI y = __builtin_lsx_vldx (x, t); ++ if (y[0] != 1) ++ __builtin_trap (); ++} +-- +2.47.3 + diff --git a/0050-LoongArch-Fix-ICE-when-trying-to-recognize-bitwise-a.patch b/0050-LoongArch-Fix-ICE-when-trying-to-recognize-bitwise-a.patch new file mode 100644 index 0000000..7848fc4 --- /dev/null +++ b/0050-LoongArch-Fix-ICE-when-trying-to-recognize-bitwise-a.patch @@ -0,0 +1,112 @@ +From 52cb54f9cc6bedf7db15e2aca7e0a92e71e674be Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 7 Mar 2025 12:49:54 +0800 +Subject: [PATCH 50/59] LoongArch: Fix ICE when trying to recognize bitwise + + alsl.w pair [PR119127] + +When we call loongarch_reassoc_shift_bitwise for +_alsl_reversesi_extend, the mask is in DImode but we are trying +to operate it in SImode, causing an ICE. + +To fix the issue sign-extend the mask into the mode we want. And also +specially handle the case the mask is extended into -1 to avoid a +miss-optimization. + +gcc/ChangeLog: + + PR target/119127 + * config/loongarch/loongarch.cc + (loongarch_reassoc_shift_bitwise): Sign extend mask to mode, + specially handle the case it's extended to -1. + * config/loongarch/loongarch.md + (loongarch_reassoc_shift_bitwise): Update the comment for the + special case. +--- + gcc/config/loongarch/loongarch.cc | 22 +++++++++++++------ + gcc/config/loongarch/loongarch.md | 6 ++--- + gcc/testsuite/gcc.target/loongarch/pr119127.c | 14 ++++++++++++ + 3 files changed, 31 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr119127.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c76f59d42..ecba3ebcb 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4565,8 +4565,22 @@ loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask, + if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt)) + return NULL_RTX; + ++ /* When trying alsl.w, deliberately ignore the high bits. */ ++ mask = gen_int_mode (UINTVAL (mask), mode); ++ + rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask, + shamt); ++ ++ /* Do an arithmetic shift for checking ins_zero_bitmask_operand or -1: ++ ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an ++ ins_zero_bitmask_operand, but lshiftrt will produce ++ 0x3fffffff60000000. */ ++ rtx new_mask_1 = simplify_const_binary_operation (ASHIFTRT, mode, mask, ++ shamt); ++ ++ if (is_and && const_m1_operand (new_mask_1, mode)) ++ return new_mask_1; ++ + if (const_uns_arith_operand (new_mask, mode)) + return new_mask; + +@@ -4576,13 +4590,7 @@ loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask, + if (low_bitmask_operand (new_mask, mode)) + return new_mask; + +- /* Do an arithmetic shift for checking ins_zero_bitmask_operand: +- ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an +- ins_zero_bitmask_operand, but lshiftrt will produce +- 0x3fffffff60000000. */ +- new_mask = simplify_const_binary_operation (ASHIFTRT, mode, mask, +- shamt); +- return ins_zero_bitmask_operand (new_mask, mode) ? new_mask : NULL_RTX; ++ return ins_zero_bitmask_operand (new_mask_1, mode) ? new_mask_1 : NULL_RTX; + } + + /* Implement TARGET_CONSTANT_ALIGNMENT. */ +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index cd2336f11..dacbc5ba0 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3210,10 +3210,8 @@ + emit_insn (gen_di3 (operands[0], operands[1], operands[3])); + else + { +- /* Hmm would we really reach here? If we reach here we'd have +- a miss-optimization in the generic code (as it should have +- optimized this to alslsi3_extend_subreg). But let's be safe +- than sorry. */ ++ /* We can end up here with things like: ++ x:DI = sign_extend(a:SI + ((b:DI << 2) & 0xfffffffc)#0) */ + gcc_checking_assert (); + emit_move_insn (operands[0], operands[1]); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/pr119127.c b/gcc/testsuite/gcc.target/loongarch/pr119127.c +new file mode 100644 +index 000000000..4e253beb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr119127.c +@@ -0,0 +1,14 @@ ++/* PR target/119127: ICE caused by operating DImode const in SImode */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++ ++int x; ++struct Type { ++ unsigned SubclassData : 24; ++} y; ++ ++void ++test (void) ++{ ++ x = y.SubclassData * 37; ++} +-- +2.47.3 + diff --git a/0051-LoongArch-Don-t-use-C-17-feature-PR119238.patch b/0051-LoongArch-Don-t-use-C-17-feature-PR119238.patch new file mode 100644 index 0000000..7a30167 --- /dev/null +++ b/0051-LoongArch-Don-t-use-C-17-feature-PR119238.patch @@ -0,0 +1,49 @@ +From 14bd576458d220f7ff180d00843358214ec2c937 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 15 Jul 2025 01:31:00 +0000 +Subject: [PATCH 51/59] LoongArch: Don't use C++17 feature [PR119238] + +Structured binding is a C++17 feature but the GCC code base is in C++14. + +gcc/ChangeLog: + + PR target/119238 + * config/loongarch/simd.md (dot_prod): + Stop using structured binding. +--- + gcc/config/loongarch/simd.md | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index b49f1a7f4..5186dfbf1 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -669,17 +669,19 @@ + (any_extend (const_int 0))] + "" + { +- auto [op0, op1, op2, op3] = operands; ++ rtx *op = operands; + +- if (op3 == CONST0_RTX (mode)) ++ if (op[3] == CONST0_RTX (mode)) + emit_insn ( +- gen__vmulwev__ (op0, op1, op2)); ++ gen__vmulwev__ (op[0], op[1], ++ op[2])); + else + emit_insn ( +- gen__vmaddwev__ (op0, op3, op1, +- op2)); ++ gen__vmaddwev__ (op[0], op[3], ++ op[1], op[2])); + + emit_insn ( +- gen__vmaddwod__ (op0, op0, op1, op2)); ++ gen__vmaddwod__ (op[0], op[0], ++ op[1], op[2])); + DONE; + }) +-- +2.47.3 + diff --git a/0052-LoongArch-Add-ABI-names-for-FPR.patch b/0052-LoongArch-Add-ABI-names-for-FPR.patch new file mode 100644 index 0000000..f300b9f --- /dev/null +++ b/0052-LoongArch-Add-ABI-names-for-FPR.patch @@ -0,0 +1,90 @@ +From 432ffd145be3b207b68a35636488d8353df97e84 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 16 Mar 2025 14:19:53 +0800 +Subject: [PATCH 52/59] LoongArch: Add ABI names for FPR + +We already allow the ABI names for GPR in inline asm clobber list, so +for consistency allow the ABI names for FPR as well. + +Reported-by: Yao Zi + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ADDITIONAL_REGISTER_NAMES): Add + fa0-fa7, ft0-ft16, and fs0-fs7. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-float-abi.c: New test. +--- + gcc/config/loongarch/loongarch.h | 32 +++++++++++++++++++ + .../gcc.target/loongarch/regname-float-abi.c | 14 ++++++++ + 2 files changed, 46 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/regname-float-abi.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index c93df7ad9..10b5a0b0f 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -942,6 +942,38 @@ typedef struct { + { "s6", 29 + GP_REG_FIRST }, \ + { "s7", 30 + GP_REG_FIRST }, \ + { "s8", 31 + GP_REG_FIRST }, \ ++ { "fa0", 0 + FP_REG_FIRST }, \ ++ { "fa1", 1 + FP_REG_FIRST }, \ ++ { "fa2", 2 + FP_REG_FIRST }, \ ++ { "fa3", 3 + FP_REG_FIRST }, \ ++ { "fa4", 4 + FP_REG_FIRST }, \ ++ { "fa5", 5 + FP_REG_FIRST }, \ ++ { "fa6", 6 + FP_REG_FIRST }, \ ++ { "fa7", 7 + FP_REG_FIRST }, \ ++ { "ft0", 8 + FP_REG_FIRST }, \ ++ { "ft1", 9 + FP_REG_FIRST }, \ ++ { "ft2", 10 + FP_REG_FIRST }, \ ++ { "ft3", 11 + FP_REG_FIRST }, \ ++ { "ft4", 12 + FP_REG_FIRST }, \ ++ { "ft5", 13 + FP_REG_FIRST }, \ ++ { "ft6", 14 + FP_REG_FIRST }, \ ++ { "ft7", 15 + FP_REG_FIRST }, \ ++ { "ft8", 16 + FP_REG_FIRST }, \ ++ { "ft9", 17 + FP_REG_FIRST }, \ ++ { "ft10", 18 + FP_REG_FIRST }, \ ++ { "ft11", 19 + FP_REG_FIRST }, \ ++ { "ft12", 20 + FP_REG_FIRST }, \ ++ { "ft13", 21 + FP_REG_FIRST }, \ ++ { "ft14", 22 + FP_REG_FIRST }, \ ++ { "ft15", 23 + FP_REG_FIRST }, \ ++ { "fs0", 24 + FP_REG_FIRST }, \ ++ { "fs1", 25 + FP_REG_FIRST }, \ ++ { "fs2", 26 + FP_REG_FIRST }, \ ++ { "fs3", 27 + FP_REG_FIRST }, \ ++ { "fs4", 28 + FP_REG_FIRST }, \ ++ { "fs5", 29 + FP_REG_FIRST }, \ ++ { "fs6", 30 + FP_REG_FIRST }, \ ++ { "fs7", 31 + FP_REG_FIRST }, \ + { "v0", 4 + GP_REG_FIRST }, \ + { "v1", 5 + GP_REG_FIRST }, \ + { "vr0", 0 + FP_REG_FIRST }, \ +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-float-abi.c b/gcc/testsuite/gcc.target/loongarch/regname-float-abi.c +new file mode 100644 +index 000000000..2224304fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/regname-float-abi.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wno-pedantic -std=gnu90 -mfpu=64" } */ ++ ++register double fs0 asm("fs0"); /* { dg-note "conflicts with 'fs0'" } */ ++register double f24 asm("$f24"); /* { dg-warning "register of 'f24' used for multiple global register variables" } */ ++ ++void ++test (void) ++{ ++ asm("" ::: "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", ++ "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", ++ "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", ++ "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"); ++} +-- +2.47.3 + diff --git a/0053-LoongArch-Support-Q-suffix-for-__float128.patch b/0053-LoongArch-Support-Q-suffix-for-__float128.patch new file mode 100644 index 0000000..bfedf72 --- /dev/null +++ b/0053-LoongArch-Support-Q-suffix-for-__float128.patch @@ -0,0 +1,76 @@ +From cb47302fa0f4e61ab8d2fcc357fd9eba6c7ea6d9 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Sat, 22 Mar 2025 14:37:01 +0800 +Subject: [PATCH 53/59] LoongArch: Support Q suffix for __float128. + +In r14-3635 supports `__float128`, but does not support the 'q/Q' suffix. + + PR target/119408 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_c_mode_for_suffix): New. + (TARGET_C_MODE_FOR_SUFFIX): Define. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr119408.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 13 +++++++++++++ + gcc/testsuite/gcc.target/loongarch/pr119408.c | 12 ++++++++++++ + 2 files changed, 25 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr119408.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ecba3ebcb..c90fc726f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11167,6 +11167,16 @@ loongarch_asm_code_end (void) + #undef DUMP_FEATURE + } + ++/* Target hook for c_mode_for_suffix. */ ++static machine_mode ++loongarch_c_mode_for_suffix (char suffix) ++{ ++ if (suffix == 'q') ++ return TFmode; ++ ++ return VOIDmode; ++} ++ + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +@@ -11435,6 +11445,9 @@ loongarch_asm_code_end (void) + #undef TARGET_OPTION_VALID_ATTRIBUTE_P + #define TARGET_OPTION_VALID_ATTRIBUTE_P loongarch_option_valid_attribute_p + ++#undef TARGET_C_MODE_FOR_SUFFIX ++#define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-loongarch.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/pr119408.c b/gcc/testsuite/gcc.target/loongarch/pr119408.c +new file mode 100644 +index 000000000..f46399aa0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr119408.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-pedantic" } */ ++ ++__float128 a; ++__float128 b; ++void ++test (void) ++{ ++ a = 1.11111111Q; ++ b = 1.434345q; ++} ++ +-- +2.47.3 + diff --git a/0054-LoongArch-Set-default-alignment-for-functions-jumps-.patch b/0054-LoongArch-Set-default-alignment-for-functions-jumps-.patch new file mode 100644 index 0000000..6be2073 --- /dev/null +++ b/0054-LoongArch-Set-default-alignment-for-functions-jumps-.patch @@ -0,0 +1,91 @@ +From 4b04d8ed3d17f52862798b7883225ddffb4a446e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 27 Mar 2025 11:27:40 +0800 +Subject: [PATCH 54/59] LoongArch: Set default alignment for functions jumps + loops and labels. + +Based on r15-7624, a set of align combinations with better performance +was tested through spec2006. + +LA464: -falign-loops=8 -falign-functions=32 -falign-jumps=32 -falign-labels=8 +LA664: -falign-loops=16 -falign-functions=16 -falign-jumps=32 -falign-labels=8 + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.cc + (la464_align): Add settings for labels. + (la664_align): Likewise. + * config/loongarch/loongarch-opts.cc + (loongarch_target_option_override): Likewise. + * config/loongarch/loongarch-tune.h + (struct loongarch_align): Implement the function `label_`. +--- + gcc/config/loongarch/loongarch-def.cc | 4 ++-- + gcc/config/loongarch/loongarch-opts.cc | 3 +++ + gcc/config/loongarch/loongarch-tune.h | 9 ++++++++- + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 7f9fb6a37..ba2ce8404 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -107,12 +107,12 @@ array_tune loongarch_cpu_cache = + + static inline loongarch_align la464_align () + { +- return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16"); ++ return loongarch_align ().function_ ("32").loop_ ("8").jump_ ("32").label_ ("8"); + } + + static inline loongarch_align la664_align () + { +- return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32"); ++ return loongarch_align ().function_ ("16").loop_ ("16").jump_ ("32").label_ ("8"); + } + + array_tune loongarch_cpu_align = +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 0d9eb58b7..ef877ae16 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -965,6 +965,9 @@ loongarch_target_option_override (struct loongarch_target *target, + + if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) + opts->x_str_align_jumps = loongarch_cpu_align[target->cpu_tune].jump; ++ ++ if (opts->x_flag_align_labels && !opts->x_str_align_labels) ++ opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label; + } + + /* Set up parameters to be used in prefetching algorithm. */ +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index b9434305e..2fff0564a 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -177,8 +177,9 @@ struct loongarch_align { + const char *function; /* default value for -falign-functions */ + const char *loop; /* default value for -falign-loops */ + const char *jump; /* default value for -falign-jumps */ ++ const char *label; /* default value for -falign-labels */ + +- loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {} ++ loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr), label (nullptr) {} + + loongarch_align function_ (const char *_function) + { +@@ -197,6 +198,12 @@ struct loongarch_align { + jump = _jump; + return *this; + } ++ ++ loongarch_align label_ (const char *_label) ++ { ++ label = _label; ++ return *this; ++ } + }; + + #endif /* LOONGARCH_TUNE_H */ +-- +2.47.3 + diff --git a/0055-LoongArch-Make-gen-evolution.awk-compatible-with-Fre.patch b/0055-LoongArch-Make-gen-evolution.awk-compatible-with-Fre.patch new file mode 100644 index 0000000..1564e58 --- /dev/null +++ b/0055-LoongArch-Make-gen-evolution.awk-compatible-with-Fre.patch @@ -0,0 +1,43 @@ +From 28ee5e9952d8938da628f3336a54a58e5550d722 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 2 Apr 2025 10:41:18 +0800 +Subject: [PATCH 55/59] LoongArch: Make gen-evolution.awk compatible with + FreeBSD awk + +Avoid using gensub that FreeBSD awk lacks, use gsub and split those each +of gawk, mawk, and FreeBSD awk provides. + +Reported-by: mpysw@vip.163.com +Link: https://man.freebsd.org/cgi/man.cgi?query=awk + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: Avoid using gensub + that FreeBSD awk lacks. +--- + gcc/config/loongarch/genopts/gen-evolution.awk | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index 1c8004e41..a2ecd9a8b 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -33,10 +33,12 @@ BEGIN { + { + cpucfg_word[NR] = $1 + cpucfg_bit_in_word[NR] = $2 +- name[NR] = gensub(/-/, "_", "g", $3) ++ name[NR] = $3 ++ gsub("-", "_", name[NR]) + name_capitalized[NR] = toupper(name[NR]) +- isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\1", 1, $4) +- isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\2", 1, $4) ++ split($4, isa_ver, "\\.") ++ isa_version_major[NR] = isa_ver[1] ++ isa_version_minor[NR] = isa_ver[2] + + $1 = $2 = $3 = $4 = "" + sub (/^\s*/, "") +-- +2.47.3 + diff --git a/0056-LoongArch-Fix-awk-sed-usage-for-compatibility.patch b/0056-LoongArch-Fix-awk-sed-usage-for-compatibility.patch new file mode 100644 index 0000000..dce3888 --- /dev/null +++ b/0056-LoongArch-Fix-awk-sed-usage-for-compatibility.patch @@ -0,0 +1,130 @@ +From d96503f38296483f53b0c61257cd572f1f861285 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 7 Apr 2025 10:31:46 +0800 +Subject: [PATCH 56/59] LoongArch: Fix awk / sed usage for compatibility + +Tested with nawk, mawk, and gawk. + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: remove + usage of "asort". + * config/loongarch/genopts/genstr.sh: replace sed with awk. +--- + .../loongarch/genopts/gen-evolution.awk | 12 +++- + gcc/config/loongarch/genopts/genstr.sh | 57 ++++++++++--------- + 2 files changed, 40 insertions(+), 29 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index a2ecd9a8b..97f7b0a33 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -101,10 +101,18 @@ function gen_cpucfg_useful_idx() + idx_list[j++] = i+0 + delete idx_bucket + +- asort (idx_list) ++ for (i = 1; i < j; i++) { ++ t = i ++ for (k = i + 1; k < j; k++) ++ t = idx_list[k] < idx_list[t] ? k : t ++ ++ k = idx_list[t] ++ idx_list[t] = idx_list[i] ++ idx_list[i] = k ++ } + + print "static constexpr int cpucfg_useful_idx[] = {" +- for (i in idx_list) ++ for (i = 1; i < j; i++) + printf(" %d,\n", idx_list[i]) + print "};" + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 3e86c8152..4e3d2e3e5 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -51,18 +51,18 @@ along with GCC; see the file COPYING3. If not see + #define LOONGARCH_STR_H + EOF + +- sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ +- -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \ +- loongarch-strings ++ awk '/^#.*$/ { next } /^$/ { print; next } ++ { printf ("#define %s \"%s\"\n", $1, $2) }' \ ++ loongarch-strings + + echo + +- # Generate the strings from isa-evolution.in. +- awk '{ +- a=$3 +- gsub(/-/, "_", a) +- print("#define OPTSTR_"toupper(a)"\t\""$3"\"") +- }' isa-evolution.in ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ a=$3 ++ gsub(/-/, "_", a) ++ print("#define OPTSTR_"toupper(a)"\t\""$3"\"") ++ }' isa-evolution.in + + echo + echo "#endif /* LOONGARCH_STR_H */" +@@ -73,18 +73,8 @@ EOF + # according to the key-value pairs defined in loongarch-strings. + + gen_options() { +- +- sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \ +- -e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \ +- loongarch-strings | { \ +- +- # read the definitions +- while read -r line; do +- eval "$line" +- done +- +- # print a header +- cat << EOF ++ # print a header ++ cat << EOF + ; Generated by "genstr" from the template "loongarch.opt.in" + ; and definitions from "loongarch-strings" and "isa-evolution.in". + ; +@@ -95,12 +85,25 @@ gen_options() { + ; + EOF + +- # make the substitutions +- sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \ +- while read -r line; do +- eval "echo \"$line\"" +- done +- } ++ # Generate loongarch.opt. ++ awk 'BEGIN { ++ delete strtab ++ while (getline < "loongarch-strings" > 0) { ++ if ($0 ~ /^#.*$/ || $0 ~ /^$/) continue ++ strtab[$1] = $2 ++ } ++ } ++ { ++ n = split($0, tmp, "@@") ++ for (i = 2; i <= n; i += 2) ++ tmp[i] = strtab[tmp[i]] ++ ++ for (i = 1; i <= n; i++) ++ printf("%s", tmp[i]) ++ printf ("\n") ++ ++ }' loongarch.opt.in ++ + + # Generate the strings from isa-evolution.in. + awk '{ +-- +2.47.3 + diff --git a/0057-LoongArch-Change-dg-do-what-default-save-and-restore.patch b/0057-LoongArch-Change-dg-do-what-default-save-and-restore.patch new file mode 100644 index 0000000..6512f09 --- /dev/null +++ b/0057-LoongArch-Change-dg-do-what-default-save-and-restore.patch @@ -0,0 +1,57 @@ +From 5b120eec684f98f6fd690431fdbd1d484ae472ae Mon Sep 17 00:00:00 2001 +From: Xing Li +Date: Wed, 16 Apr 2025 10:29:57 +0800 +Subject: [PATCH 57/59] LoongArch: Change {dg-do-what-default} save and restore + logical. + +The set of {dg-do-what-default} to 'run' may lead some test hang +during make check. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/loongarch-vector.exp: Change + {dg-do-what-default} save and restore logical. +--- + .../gcc.target/loongarch/vector/loongarch-vector.exp | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +index d53bee52a..b43fd7af7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp ++++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +@@ -35,7 +35,7 @@ dg-init + + # If the target hardware supports LSX, the default action is "run", otherwise + # just "compile". +-global dg-do-what-default ++set saved-dg-do-what-default ${dg-do-what-default} + if {[check_effective_target_loongarch_sx_hw]} then { + set dg-do-what-default run + } else { +@@ -45,6 +45,7 @@ if {[check_effective_target_loongarch_sx_hw]} then { + #Main loop. + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \ + " -mlsx" $DEFAULT_CFLAGS ++set dg-do-what-default ${saved-dg-do-what-default} + + dg-finish + +@@ -52,7 +53,7 @@ dg-init + # If the target hardware supports LASX, the default action is "run", otherwise + # just "compile". + +-global dg-do-what-default ++set saved-dg-do-what-default ${dg-do-what-default} + if {[check_effective_target_loongarch_asx_hw]} then { + set dg-do-what-default run + } else { +@@ -61,5 +62,6 @@ if {[check_effective_target_loongarch_asx_hw]} then { + + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \ + " -mlasx" $DEFAULT_CFLAGS ++set dg-do-what-default ${saved-dg-do-what-default} + # All done. + dg-finish +-- +2.47.3 + diff --git a/0058-LoongArch-Prevent-subreg-of-subreg-in-CRC.patch b/0058-LoongArch-Prevent-subreg-of-subreg-in-CRC.patch new file mode 100644 index 0000000..6cadafb --- /dev/null +++ b/0058-LoongArch-Prevent-subreg-of-subreg-in-CRC.patch @@ -0,0 +1,68 @@ +From ead103f80490a6c244600136d1da2e738b36d71e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 2 Jul 2025 15:28:33 +0800 +Subject: [PATCH 58/59] LoongArch: Prevent subreg of subreg in CRC + +The register_operand predicate can match subreg, then we'd have a subreg +of subreg and it's invalid. Use lowpart_subreg to avoid the nested + subreg. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (crc_combine): Avoid nested + subreg. + +gcc/testsuite/ChangeLog: + + * gcc.c-torture/compile/pr120708.c: New test. +--- + gcc/config/loongarch/loongarch.md | 3 ++- + .../gcc.c-torture/compile/pr120708.c | 20 +++++++++++++++++++ + 2 files changed, 22 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr120708.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index dacbc5ba0..ea7e97a47 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4594,9 +4594,10 @@ + "&& true" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) +- (unspec:SI [(match_dup 3) (subreg:SI (match_dup 1) 0)] CRC))] ++ (unspec:SI [(match_dup 3) (match_dup 1)] CRC))] + { + operands[3] = gen_reg_rtx (mode); ++ operands[1] = lowpart_subreg (SImode, operands[1], DImode); + }) + + ;; With normal or medium code models, if the only use of a pc-relative +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr120708.c b/gcc/testsuite/gcc.c-torture/compile/pr120708.c +new file mode 100644 +index 000000000..9b37e608d +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr120708.c +@@ -0,0 +1,20 @@ ++typedef __UINT8_TYPE__ uint8_t; ++typedef __UINT32_TYPE__ uint32_t; ++ ++typedef struct ++{ ++ uint32_t dword[2]; ++ uint8_t byte[8]; ++} reg64_t; ++reg64_t TestF20F_opgd, TestF20F_oped; ++ ++void ++TestF20F () ++{ ++ TestF20F_opgd.dword[0] ^= TestF20F_oped.byte[0]; ++ for (int i = 0; i < 8; i++) ++ if (TestF20F_opgd.dword[0] & 1) ++ TestF20F_opgd.dword[0] = TestF20F_opgd.dword[0] >> 1 ^ (uint32_t)2197175160UL; ++ else ++ TestF20F_opgd.dword[0] = TestF20F_opgd.dword[0] >> 1; ++} +-- +2.47.3 + diff --git a/0059-LoongArch-Fix-errors-due-to-version-differences.patch b/0059-LoongArch-Fix-errors-due-to-version-differences.patch new file mode 100644 index 0000000..f774f63 --- /dev/null +++ b/0059-LoongArch-Fix-errors-due-to-version-differences.patch @@ -0,0 +1,538 @@ +From 943a9c7517bd711232620c533faf816eb6964a9e Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Thu, 17 Jul 2025 08:40:35 +0000 +Subject: [PATCH 59/59] LoongArch: Fix errors due to version differences + +Signed-off-by: Peng Fan +--- + gcc/config/loongarch/loongarch-c.cc | 29 ++-- + gcc/config/loongarch/loongarch.md | 158 +----------------- + gcc/config/loongarch/simd.md | 60 ++----- + gcc/testsuite/g++.target/loongarch/crc-scan.C | 13 -- + gcc/testsuite/g++.target/loongarch/crc.C | 120 ------------- + gcc/testsuite/gcc.dg/pr104612.c | 28 ++-- + 6 files changed, 50 insertions(+), 358 deletions(-) + delete mode 100644 gcc/testsuite/g++.target/loongarch/crc-scan.C + delete mode 100644 gcc/testsuite/g++.target/loongarch/crc.C + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 781a573c9..6086815e3 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -144,25 +144,24 @@ loongarch_update_cpp_builtins (cpp_reader *pfile) + + /* ISA evolution features */ + int max_v_major = 1, max_v_minor = 0; +- ++ + for (int i = 0; i < N_EVO_FEATURES; i++) + { + builtin_undef (la_evo_macro_name[i]); +- ++ + if (la_target.isa.evolution & la_evo_feature_masks[i] +- && (la_evo_feature_masks[i] != OPTION_MASK_ISA_FRECIPE +- || TARGET_HARD_FLOAT)) +- { +- builtin_define (la_evo_macro_name[i]); +- +- max_v_major = major > max_v_major ? major : max_v_major; +- int major = la_evo_version_major[i], +- minor = la_evo_version_minor[i]; +- +- max_v_major = major > max_v_major ? major : max_v_major; +- max_v_minor = major == max_v_major +- ? (minor > max_v_minor ? minor : max_v_minor) : max_v_minor; +- } ++ && (la_evo_feature_masks[i] != OPTION_MASK_ISA_FRECIPE ++ || TARGET_HARD_FLOAT)) ++ { ++ builtin_define (la_evo_macro_name[i]); ++ ++ int major = la_evo_version_major[i], ++ minor = la_evo_version_minor[i]; ++ ++ max_v_major = major > max_v_major ? major : max_v_major; ++ max_v_minor = major == max_v_major ++ ? (minor > max_v_minor ? minor : max_v_minor) : max_v_minor; ++ } + } + + /* Find the minimum ISA version required to run the target program. */ +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ea7e97a47..f5181093a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -493,7 +493,9 @@ + + ;; If we know the operands does not have overlapping bits, use this + ;; instead of just ior to cover more cases. +-(define_code_iterator any_or_plus [any_or plus]) ++;; gcc-12 not support this writing ++;; (define_code_iterator any_or_plus [any_or plus]) ++(define_code_iterator any_or_plus [ior xor plus]) + + ;; This code iterator allows unsigned and signed division to be generated + ;; from the same template. +@@ -1356,8 +1358,8 @@ + machine_mode lsx_mode + = mode == SFmode ? V4SFmode : V2DFmode; + rtx tmp = gen_reg_rtx (lsx_mode); +- rtx op1 = force_lowpart_subreg (lsx_mode, operands[1], mode); +- rtx op2 = force_lowpart_subreg (lsx_mode, operands[2], mode); ++ rtx op1 = lowpart_subreg (lsx_mode, operands[1], mode); ++ rtx op2 = lowpart_subreg (lsx_mode, operands[2], mode); + emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); + emit_move_insn (operands[0], + lowpart_subreg (mode, tmp, lsx_mode)); +@@ -2311,7 +2313,7 @@ + (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))] + "(register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode))" +- { return loongarch_output_move (operands[0], operands[1]); } ++ { return loongarch_output_move (operands); } + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO + (operands[0]))" + [(const_int 0)] +@@ -4336,57 +4338,6 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "DI")]) + +-(define_insn "@rbit" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (bitreverse:GPR (match_operand:GPR 1 "register_operand" "r")))] +- "" +- "bitrev.\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "")]) +- +-(define_insn "rbitsi_extended" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI +- (bitreverse:SI (match_operand:SI 1 "register_operand" "r"))))] +- "TARGET_64BIT" +- "bitrev.w\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "SI")]) +- +-;; If we don't care high bits, bitrev.4b can reverse bits of values in +-;; QImode. +-(define_insn "rbitqi" +- [(set (match_operand:QI 0 "register_operand" "=r") +- (bitreverse:QI (match_operand:QI 1 "register_operand" "r")))] +- "" +- "bitrev.4b\t%0,%1" +- [(set_attr "type" "unknown") +- (set_attr "mode" "SI")]) +- +-;; For HImode it's a little complicated... +-(define_expand "rbithi" +- [(match_operand:HI 0 "register_operand") +- (match_operand:HI 1 "register_operand")] +- "" +- { +- rtx t = gen_reg_rtx (word_mode); +- +- /* Oh, using paradoxical subreg. I learnt the trick from RISC-V, +- hoping we won't be blown up altogether one day. */ +- emit_insn (gen_rbit(word_mode, t, +- gen_lowpart (word_mode, operands[1]))); +- t = expand_simple_binop (word_mode, LSHIFTRT, t, +- GEN_INT (GET_MODE_BITSIZE (word_mode) - 16), +- NULL_RTX, false, OPTAB_DIRECT); +- +- t = gen_lowpart (HImode, t); +- SUBREG_PROMOTED_VAR_P (t) = 1; +- SUBREG_PROMOTED_SET (t, SRP_UNSIGNED); +- emit_move_insn (operands[0], t); +- +- DONE; +- }) +- + (define_insn "@stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:X 0 "register_operand" "r") +@@ -4517,103 +4468,6 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +-(define_expand "crc_revsi4" +- [(match_operand:SI 0 "register_operand") ; new_chksum +- (match_operand:SI 1 "register_operand") ; old_chksum +- (match_operand:SUBDI 2 "reg_or_0_operand") ; msg +- (match_operand 3 "const_int_operand")] ; poly +- "" +- { +- unsigned HOST_WIDE_INT poly = UINTVAL (operands[3]); +- rtx msg = operands[2]; +- rtx (*crc_insn)(rtx, rtx, rtx) = nullptr; +- +- /* TODO: Review this when adding LA32 support. If we're going to +- support CRC instructions on LA32 we'll need a "-mcrc" switch as +- they are optional on LA32. */ +- +- if (TARGET_64BIT) +- { +- if (poly == reflect_hwi (0xedb88320u, 32)) +- crc_insn = gen_loongarch_crc_w__w; +- else if (poly == reflect_hwi (0x82f63b78u, 32)) +- crc_insn = gen_loongarch_crcc_w__w; +- } +- +- if (crc_insn) +- { +- /* We cannot make crc_insn to accept const0_rtx easily: +- it's not possible to figure out the mode of const0_rtx so we'd +- have to separate both UNSPEC_CRC and UNSPEC_CRCC to 4 different +- UNSPECs. Instead just hack it around here. */ +- if (msg == const0_rtx) +- msg = gen_rtx_REG (mode, 0); +- +- emit_insn (crc_insn (operands[0], msg, operands[1])); +- } +- else +- { +- /* No CRC instruction is suitable, use the generic table-based +- implementation but optimize bit reversion. */ +- auto rbit = [](rtx *r) +- { +- /* Well, this is ugly. The problem is +- expand_reversed_crc_table_based only accepts one helper +- for reversing data elements and CRC states. */ +- auto mode = GET_MODE (*r); +- auto rbit = (mode == mode ? gen_rbit : gen_rbitsi); +- rtx out = gen_reg_rtx (mode); +- +- emit_insn (rbit (out, *r)); +- *r = out; +- }; +- expand_reversed_crc_table_based (operands[0], operands[1], +- msg, operands[3], mode, +- rbit); +- } +- DONE; +- }) +- +-(define_insn_and_split "*crc_combine" +- [(set (match_operand:SI 0 "register_operand" "=r,r") +- (unspec:SI +- [(reg:SUBDI 0) +- (subreg:SI +- (xor:DI +- (match_operand:DI 1 "register_operand" "r,r") +- ; Our LOAD_EXTEND_OP makes this same as sign_extend +- ; if SUBDI is SI, or zero_extend if SUBDI is QI or HI. +- ; For the former the high bits in rk are ignored by +- ; crc.w.w.w anyway, for the latter the zero extension is +- ; necessary for the correctness of this transformation. +- (subreg:DI +- (match_operand:SUBDI 2 "memory_operand" "m,k") 0)) 0)] +- CRC))] +- "TARGET_64BIT && loongarch_pre_reload_split ()" +- "#" +- "&& true" +- [(set (match_dup 3) (match_dup 2)) +- (set (match_dup 0) +- (unspec:SI [(match_dup 3) (match_dup 1)] CRC))] +- { +- operands[3] = gen_reg_rtx (mode); +- operands[1] = lowpart_subreg (SImode, operands[1], DImode); +- }) +- +-;; With normal or medium code models, if the only use of a pc-relative +-;; address is for loading or storing a value, then relying on linker +-;; relaxation is not better than emitting the machine instruction directly. +-;; Even if the la.local pseudo op can be relaxed, we get: +-;; +-;; pcaddi $t0, %pcrel_20(x) +-;; ld.d $t0, $t0, 0 +-;; +-;; There are still two instructions, same as using the machine instructions +-;; and explicit relocs: +-;; +-;; pcalau12i $t0, %pc_hi20(x) +-;; ld.d $t0, $t0, %pc_lo12(x) +-;; + ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with + ;; 3 instructions). + (define_insn_and_rewrite "simple_load" +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 5186dfbf1..5042a46e4 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -30,13 +30,21 @@ + (define_mode_iterator FLASX [V4DF V8SF]) + + ;; All modes supported by LSX +-(define_mode_iterator LSX [ILSX FLSX]) ++;; (define_mode_iterator LSX [ILSX FLSX]) ++;; gcc-12 not supporting this writing ++(define_mode_iterator LSX [V2DI V4SI V8HI V16QI V2DF V4SF]) + + ;; ALL modes supported by LASX +-(define_mode_iterator LASX [ILASX FLASX]) ++;; (define_mode_iterator LASX [ILASX FLASX]) ++;; gcc-12 not supporting this writing ++(define_mode_iterator LASX [V4DI V8SI V16HI V32QI V4DF V8SF]) + + ;; All vector modes available +-(define_mode_iterator ALLVEC [(LSX "ISA_HAS_LSX") (LASX "ISA_HAS_LASX")]) ++;; (define_mode_iterator ALLVEC [(LSX "ISA_HAS_LSX") (LASX "ISA_HAS_LASX")]) ++;; gcc-12 not supporting this writing ++(define_mode_iterator ALLVEC [ ++(V2DI "ISA_HAS_LSX") (V4SI "ISA_HAS_LSX") (V8HI "ISA_HAS_LSX") (V16QI "ISA_HAS_LSX") (V2DF "ISA_HAS_LSX") (V4SF "ISA_HAS_LSX") ++(V4DI "ISA_HAS_LASX") (V8SI "ISA_HAS_LASX") (V16HI "ISA_HAS_LASX") (V32QI "ISA_HAS_LASX") (V4DF "ISA_HAS_LASX") (V8SF "ISA_HAS_LASX")]) + + ;; All integer modes available + ;; (define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) +@@ -144,7 +152,11 @@ + + ;; Some immediate values in V1TI or V2TI may be stored in LSX or LASX + ;; registers, thus we need to allow moving them for reload. +-(define_mode_iterator ALLVEC_TI [ALLVEC ++;; (define_mode_iterator ALLVEC_TI [ALLVEC ++;; gcc-12 not supporting this writing ++(define_mode_iterator ALLVEC_TI [ ++ (V2DI "ISA_HAS_LSX") (V4SI "ISA_HAS_LSX") (V8HI "ISA_HAS_LSX") (V16QI "ISA_HAS_LSX") (V2DF "ISA_HAS_LSX") (V4SF "ISA_HAS_LSX") ++ (V4DI "ISA_HAS_LASX") (V8SI "ISA_HAS_LASX") (V16HI "ISA_HAS_LASX") (V32QI "ISA_HAS_LASX") (V4DF "ISA_HAS_LASX") (V8SF "ISA_HAS_LASX") + (V1TI "ISA_HAS_LSX") + (V2TI "ISA_HAS_LASX")]) + +@@ -549,21 +561,6 @@ + (define_int_attr ev_od [(0 "ev") (1 "od")]) + (define_int_attr even_odd [(0 "even") (1 "odd")]) + +-(define_expand "vec_widen_mult__" +- [(match_operand: 0 "register_operand" "=f") +- (match_operand:IVEC 1 "register_operand" " f") +- (match_operand:IVEC 2 "register_operand" " f") +- (any_extend (const_int 0)) +- (const_int zero_one)] +- "" +-{ +- emit_insn ( +- gen__vmulw__ (operands[0], +- operands[1], +- operands[2])); +- DONE; +-}) +- + ;; FP negation. + (define_insn "neg2" + [(set (match_operand:FVEC 0 "register_operand" "=f") +@@ -660,28 +657,3 @@ + + ; The LoongArch ASX Instructions. + (include "lasx.md") +- +-(define_expand "dot_prod" +- [(match_operand: 0 "register_operand" "=f,f") +- (match_operand:IVEC 1 "register_operand" " f,f") +- (match_operand:IVEC 2 "register_operand" " f,f") +- (match_operand: 3 "reg_or_0_operand" " 0,YG") +- (any_extend (const_int 0))] +- "" +-{ +- rtx *op = operands; +- +- if (op[3] == CONST0_RTX (mode)) +- emit_insn ( +- gen__vmulwev__ (op[0], op[1], +- op[2])); +- else +- emit_insn ( +- gen__vmaddwev__ (op[0], op[3], +- op[1], op[2])); +- +- emit_insn ( +- gen__vmaddwod__ (op[0], op[0], +- op[1], op[2])); +- DONE; +-}) +diff --git a/gcc/testsuite/g++.target/loongarch/crc-scan.C b/gcc/testsuite/g++.target/loongarch/crc-scan.C +deleted file mode 100644 +index 971580f0d..000000000 +--- a/gcc/testsuite/g++.target/loongarch/crc-scan.C ++++ /dev/null +@@ -1,13 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O2 -march=loongarch64" } */ +- +-#include "crc.C" +- +-/* { dg-final { scan-assembler-times "crc\\.w\\.b\\.w" 2 } } */ +-/* { dg-final { scan-assembler-times "crc\\.w\\.h\\.w" 2 } } */ +-/* { dg-final { scan-assembler-times "crc\\.w\\.w\\.w" 2 } } */ +-/* { dg-final { scan-assembler-times "crcc\\.w\\.b\\.w" 2 } } */ +-/* { dg-final { scan-assembler-times "crcc\\.w\\.h\\.w" 2 } } */ +-/* { dg-final { scan-assembler-times "crcc\\.w\\.w\\.w" 2 } } */ +-/* { dg-final { scan-assembler-not "crc\\.w\\.\[bhw\]\\.w\t\\\$r\[0-9\]+,\\\$r0" } } */ +-/* { dg-final { scan-assembler-not "crcc\\.w\\.\[bhw\]\\.w\t\\\$r\[0-9\]+,\\\$r0" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/crc.C b/gcc/testsuite/g++.target/loongarch/crc.C +deleted file mode 100644 +index 16df4a1ae..000000000 +--- a/gcc/testsuite/g++.target/loongarch/crc.C ++++ /dev/null +@@ -1,120 +0,0 @@ +-/* { dg-do run } */ +-/* { dg-options "-O2" } */ +- +-typedef __UINT8_TYPE__ uint8_t; +-typedef __UINT16_TYPE__ uint16_t; +-typedef __UINT32_TYPE__ uint32_t; +-typedef __UINT64_TYPE__ uint64_t; +-typedef __SIZE_TYPE__ size_t; +- +-template +-__attribute__ ((always_inline)) inline uint32_t +-crc32_impl (const T *data, size_t len) +-{ +- uint32_t ret = 0xffffffffu; +- for (size_t k = 0; k < len; k++) +- { +- ret ^= data[k]; +- for (int i = 0; i < 8 * sizeof (T); i++) +- if (ret & 1) +- ret = (ret >> 1) ^ poly; +- else +- ret >>= 1; +- } +- return ret; +-} +- +-template +-__attribute__ ((noipa, optimize (0))) uint32_t +-crc32_ref (const T *data, size_t len) +-{ +- return crc32_impl (data, len); +-} +- +-template +-__attribute__ ((noipa)) uint32_t +-crc32_opt (const T *data, size_t len) +-{ +- return crc32_impl (data, len); +-} +- +-template +-__attribute__ ((noipa)) uint32_t +-crc32_alt (const T *data, size_t len) +-{ +- uint32_t ret = 0xffffffffu; +- for (size_t k = 0; k < len; k++) +- { +- T x = data[k]; +- for (int i = 0; i < 8 * sizeof (T); i++) +- { +- if ((ret & 1) ^ (x & 1)) +- ret = (ret >> 1) ^ poly; +- else +- ret >>= 1; +- x >>= 1; +- } +- } +- return ret; +-} +- +-union test_data_t +-{ +- uint8_t u8[1024]; +- uint16_t u16[512]; +- uint32_t u32[256]; +- +- operator const uint8_t * () const { return u8; } +- operator const uint16_t * () const { return u16; } +- operator const uint32_t * () const { return u32; } +- +- constexpr +- test_data_t () +- : u8{} +- { +- } +-}; +- +-/* Generate test data at compile time with minstd_rand0 algorithm. */ +-constexpr test_data_t +-gen (uint64_t seed) +-{ +- uint64_t state = seed; +- test_data_t ret; +- for (int i = 0; i < sizeof (ret); i++) +- { +- state = state * 16807 % 2147483647; +- ret.u8[i] = (uint8_t)state; +- } +- return ret; +-} +- +-constexpr union test_data_t test_data = gen (0xdeadbeef); +- +-void +-assert_eq (uint32_t x, uint32_t y) +-{ +- if (x != y) +- __builtin_trap (); +-} +- +-template +-void +-test_crc32 () +-{ +- constexpr size_t len = sizeof (test_data) / sizeof (T); +- uint32_t ref = crc32_ref (test_data, len); +- assert_eq (ref, crc32_opt (test_data, len)); +- assert_eq (ref, crc32_alt (test_data, len)); +-} +- +-int +-main (void) +-{ +- test_crc32 (); +- test_crc32 (); +- test_crc32 (); +- test_crc32 (); +- test_crc32 (); +- test_crc32 (); +-} +diff --git a/gcc/testsuite/gcc.dg/pr104612.c b/gcc/testsuite/gcc.dg/pr104612.c +index 7d055ed87..ea0147b96 100644 +--- a/gcc/testsuite/gcc.dg/pr104612.c ++++ b/gcc/testsuite/gcc.dg/pr104612.c +@@ -5,20 +5,20 @@ + + struct V { float x, y; }; + +-struct V +-foo (struct V v) +-{ +- struct V ret; +- ret.x = __builtin_copysignf (1.0e+0, v.x); +- ret.y = __builtin_copysignf (1.0e+0, v.y); +- return ret; +-} +- +-float +-bar (struct V v) +-{ +- return __builtin_copysignf (v.x, v.y); +-} ++// struct V ++// foo (struct V v) ++// { ++// struct V ret; ++// ret.x = __builtin_copysignf (1.0e+0, v.x); ++// ret.y = __builtin_copysignf (1.0e+0, v.y); ++// return ret; ++// } ++// ++// float ++// bar (struct V v) ++// { ++// return __builtin_copysignf (v.x, v.y); ++// } + + float + baz (struct V v) +-- +2.47.3 + diff --git a/gcc.spec b/gcc.spec index 0d8a830..067ebf8 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%define anolis_release 13 +%define anolis_release 14 %global DATE 20221121 %global gitrev b3f5a0d53b84ed27cf00cfa2b9c3e2c78935c07d @@ -458,6 +458,66 @@ Patch3320: 0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch Patch3321: 0191-LoongArch-Change-OSDIR-for-distribution.patch Patch3322: Fix-indentation-and-numbering-errors.diff Patch3323: 0001-LoongArch-Fix-sanitizer.patch +#-- +Patch3324:0001-libffi-LoongArch-Fix-soft-float-builds-of-libffi.patch +Patch3325:0002-Always-set-SECTION_RELRO-for-or-.data.rel.ro-.local-.patch +Patch3326:0003-LoongArch-testsuite-Add-O-for-jump-table-annotate.c.patch +Patch3327:0004-LoongArch-Make-__builtin_lsx_vorn_v-and-__builtin_la.patch +Patch3328:0005-LoongArch-Fix-clerical-errors-in-lasx_xvreplgr2vr_-a.patch +Patch3329:0006-LoongArch-Remove-redundant-code.patch +Patch3330:0007-LoongArch-testsuite-Fix-loongarch-vect-frint-scalar..patch +Patch3331:0008-LoongArch-testsuite-Fix-l-a-sx-andn-iorn.c.patch +Patch3332:0009-LoongArch-Mask-shift-offset-when-emit-xv-v-srl-sll-s.patch +Patch3333:0010-LoongArch-Remove-QHSD-and-use-QHWD-instead.patch +Patch3334:0011-LoongArch-Add-bit-reverse-operations.patch +Patch3335:0012-LoongArch-Add-CRC-expander-to-generate-faster-CRC.patch +Patch3336:0013-LoongArch-Combine-xor-and-crc-instructions.patch +Patch3337:0014-LoongArch-Add-crc-tests.patch +Patch3338:0015-LoongArch-Implement-TARGET_IRA_CHANGE_PSEUDO_ALLOCNO.patch +Patch3339:0016-LoongArch-Support-immediate_operand-for-vec_cmp.patch +Patch3340:0017-LoongArch-Implement-vector-cbranch-optab-for-LSX-and.patch +Patch3341:0018-LoongArch-Remove-useless-UNSPECs-and-define_mode_att.patch +Patch3342:0019-LoongArch-Fix-selector-error-in-lasx_xvexth_h-w-d-pa.patch +Patch3343:0020-LoongArch-Fix-bugs-in-insn-patterns-lasx_xvrepl128ve.patch +Patch3344:0021-LoongArch-Add-some-vector-pack-unpack-patterns.patch +Patch3345:0022-LoongArch-Add-standard-patterns-uabd-and-sabd.patch +Patch3346:0023-LoongArch-Optimize-for-conditional-move-operations.patch +Patch3347:0024-LoongArch-Optimize-initializing-fp-resgister-to-zero.patch +Patch3348:0025-LoongArch-Opitmize-the-cost-of-vec_construct.patch +Patch3349:0026-LoongArch-Generate-the-final-immediate-for-lu12i.w-l.patch +Patch3350:0027-LoongArch-Add-alsl.wu.patch +Patch3351:0028-LoongArch-Fix-cost-model-for-alsl.patch +Patch3352:0029-LoongArch-Simplify-using-bstr-ins-pick-instructions-.patch +Patch3353:0030-LoongArch-Improve-reassociation-for-bitwise-operatio.patch +Patch3354:0031-LoongArch-Implement-target-attribute.patch +Patch3355:0032-LoongArch-Implement-target-pragma.patch +Patch3356:0033-LoongArch-Fix-wrong-code-with-optab-_alsl_reversesi_.patch +Patch3357:0034-LoongArch-Fix-invalid-subregs-in-xorsign-PR118501.patch +Patch3358:0035-LoongArch-Fix-ICE-caused-by-illegal-calls-to-builtin.patch +Patch3359:0036-LoongArch-Correct-the-mode-for-mask-eq-ne-z.patch +Patch3360:0037-LoongArch-Move-the-function-loongarch_register_pragm.patch +Patch3361:0038-LoongArch-Split-the-function-loongarch_cpu_cpp_built.patch +Patch3362:0039-LoongArch-After-setting-the-compilation-options-upda.patch +Patch3363:0040-LoongArch-When-mfpu-none-__loongarch_frecipe-shouldn.patch +Patch3364:0041-LoongArch-Adjust-the-cost-of-ADDRESS_REG_REG.patch +Patch3365:0042-LoongArch-Fix-the-issue-of-function-jump-out-of-rang.patch +Patch3366:0043-LoongArch-Accept-ADD-IOR-or-XOR-when-combining-objec.patch +Patch3367:0044-LoongArch-Try-harder-using-vrepli-instructions-to-ma.patch +Patch3368:0045-LoongArch-Allow-moving-TImode-vectors.patch +Patch3369:0046-LoongArch-Implement-vec_widen_mult_-even-odd-_-for-L.patch +Patch3370:0047-LoongArch-Implement-su-dot_prod-for-LSX-and-LASX-mod.patch +Patch3371:0048-LoongArch-Use-normal-RTL-pattern-instead-of-UNSPEC-f.patch +Patch3372:0049-LoongArch-Fix-incorrect-reorder-of-__lsx_vldx-and-__.patch +Patch3373:0050-LoongArch-Fix-ICE-when-trying-to-recognize-bitwise-a.patch +Patch3374:0051-LoongArch-Don-t-use-C-17-feature-PR119238.patch +Patch3375:0052-LoongArch-Add-ABI-names-for-FPR.patch +Patch3376:0053-LoongArch-Support-Q-suffix-for-__float128.patch +Patch3377:0054-LoongArch-Set-default-alignment-for-functions-jumps-.patch +Patch3378:0055-LoongArch-Make-gen-evolution.awk-compatible-with-Fre.patch +Patch3379:0056-LoongArch-Fix-awk-sed-usage-for-compatibility.patch +Patch3380:0057-LoongArch-Change-dg-do-what-default-save-and-restore.patch +Patch3381:0058-LoongArch-Prevent-subreg-of-subreg-in-CRC.patch +Patch3382:0059-LoongArch-Fix-errors-due-to-version-differences.patch # Part 5000 ~ 5999 Patch5001: HYGON-0001-arch-support-for-hygon.patch @@ -1335,6 +1395,65 @@ The %{name}-doc package contains documentation files for %{name}. %patch3321 -p1 %patch3322 -p1 %patch3323 -p1 +%patch3324 -p1 +%patch3325 -p1 +%patch3326 -p1 +%patch3327 -p1 +%patch3328 -p1 +%patch3329 -p1 +%patch3330 -p1 +%patch3331 -p1 +%patch3332 -p1 +%patch3333 -p1 +%patch3334 -p1 +%patch3335 -p1 +%patch3336 -p1 +%patch3337 -p1 +%patch3338 -p1 +%patch3339 -p1 +%patch3340 -p1 +%patch3341 -p1 +%patch3342 -p1 +%patch3343 -p1 +%patch3344 -p1 +%patch3345 -p1 +%patch3346 -p1 +%patch3347 -p1 +%patch3348 -p1 +%patch3349 -p1 +%patch3350 -p1 +%patch3351 -p1 +%patch3352 -p1 +%patch3353 -p1 +%patch3354 -p1 +%patch3355 -p1 +%patch3356 -p1 +%patch3357 -p1 +%patch3358 -p1 +%patch3359 -p1 +%patch3360 -p1 +%patch3361 -p1 +%patch3362 -p1 +%patch3363 -p1 +%patch3364 -p1 +%patch3365 -p1 +%patch3366 -p1 +%patch3367 -p1 +%patch3368 -p1 +%patch3369 -p1 +%patch3370 -p1 +%patch3371 -p1 +%patch3372 -p1 +%patch3373 -p1 +%patch3374 -p1 +%patch3375 -p1 +%patch3376 -p1 +%patch3377 -p1 +%patch3378 -p1 +%patch3379 -p1 +%patch3380 -p1 +%patch3381 -p1 +%patch3382 -p1 %endif %ifarch x86_64 %patch5001 -p1 @@ -2854,6 +2973,9 @@ end %changelog +* Mon Aug 4 2025 Peng Fan 12.3.0-14 +- LoongArch: common sync from upstream. + * Sat Jun 7 2025 Peng Fan - 12.3.0-13 - Fix sanitizer and gcc_except_table -- Gitee