From 99da985c5a32dd1eb2b02050a7e2f6606c4e5338 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 31 Oct 2024 10:33:46 +0800 Subject: [PATCH] LoongArch: Sync to upstream Signed-off-by: Peng Fan --- ...ement-multilib-build-option-handling.patch | 464 ++ ...whether-binutils-supports-the-relax-.patch | 192 + 0003-Modify-gas-uleb128-support-test.patch | 115 + ...Optimizations-of-vector-construction.patch | 1310 +++++ ...e-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch | 51 + ...-makefile-dependency-for-loongarch-h.patch | 71 + ...ble-vect.exp-for-LoongArch.-PR111424.patch | 65 + ...-macro-definition-ASM_OUTPUT_ALIGN_W.patch | 48 + ...c_initv32qiv16qi-template-to-avoid-I.patch | 105 + ...mp.caf.s-instead-of-movgr2cf-for-zer.patch | 35 + ...Implement-avg-and-sad-standard-names.patch | 389 ++ ...h-Implement-vec_widen-standard-names.patch | 403 ++ ...ent-the-new-vector-cost-model-framew.patch | 354 ++ ...ngArch-Define-macro-CLEAR_INSN_CACHE.patch | 34 + ...d-enum-style-mexplicit-relocs-option.patch | 233 + ...plicit-relocs-for-GOT-access-when-me.patch | 212 + ...plicit-relocs-for-TLS-access-with-me.patch | 146 + ...plicit-relocs-for-addresses-only-use.patch | 245 + ...ent-__builtin_thread_pointer-for-TLS.patch | 84 + ...rint-releated-comments-in-lsxintrin..patch | 189 + ...-vcond_mask_mn-expanders-for-SF-DF-m.patch | 418 ++ ...-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch | 34 + ...struction-name-typo-in-lsx_vreplgr2v.patch | 30 + ...mplify_gen_subreg-instead-of-gen_rtx.patch | 116 + ...ze-single-used-address-with-mexplici.patch | 116 + ...e-relaxation-if-the-assembler-don-t-.patch | 305 ++ ...-redundant-barrier-instructions-befo.patch | 391 ++ ...an-assembler-times-of-lasx-lsx-test-.patch | 161 + ...se-cost-of-vector-aligned-store-load.patch | 45 + ...plement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch | 58 + ...-vectorized-copysign-x-1-expansion-e.patch | 197 + ...de-generation-support-for-call36-fun.patch | 561 +++ ...ent-atomic-operations-using-LoongArc.patch | 362 ++ ..._load-and-atomic_store-are-implement.patch | 140 + ...s-Add-infrastructure-to-generate-cod.patch | 615 +++ ...olution-features-of-base-ISA-revisio.patch | 148 + ...he-advantage-of-mdiv32-if-it-s-enabl.patch | 156 + ...-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch | 61 + ...ne-grained-control-for-LAM_BH-and-LA.patch | 208 + ...xplict-relocs-none-mcmodel-medium-pr.patch | 50 + ...LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch | 43 + ...bgcc-build-failure-when-libc-is-not-.patch | 85 + ...ze-LSX-vector-shuffle-on-floating-po.patch | 148 + ...ze-the-loading-of-immediate-numbers-.patch | 112 + ...ntime-error-in-a-gcc-build-with-with.patch | 30 + ...age-of-LSX-and-LASX-frint-ftint-inst.patch | 1295 +++++ ...andard-pattern-name-and-RTX-code-for.patch | 268 + ...andard-pattern-name-and-RTX-code-for.patch | 285 ++ ...LoongArch-Remove-lrint_allow_inexact.patch | 42 + ...X-for-scalar-FP-rounding-with-explic.patch | 150 + ...-duplicate-definition-of-CLZ_DEFINED.patch | 49 + ...vectorized-hardware-inspection-for-t.patch | 4375 +++++++++++++++++ ...rate-optimization-of-scalar-signed-u.patch | 148 + ...ze-vector-constant-extract-even-odd-.patch | 163 + ...trinsic-function-descriptions-for-LS.patch | 1697 +++++++ ...-loongarch-def-from-C-to-C-to-make-i.patch | 925 ++++ ...-the-definition-of-ISA_BASE_LA64V110.patch | 261 + 0058-LoongArch-Add-support-for-xorsign.patch | 412 ++ ...pport-for-LoongArch-V1.1-approximate.patch | 730 +++ ...andard-pattern-name-for-xvfrsqrt-vfr.patch | 257 + ...ne-pattern-for-xvfrecip-vfrecip-inst.patch | 135 + ...tions-mrecip-and-mrecip-with-ffast-m.patch | 1096 +++++ ...ized-loop-unrolling-is-disable-for-d.patch | 83 + ...x-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch | 130 + ...E-and-use-simplify_gen_subreg-instea.patch | 318 ++ ...h_return-epilogue-for-normal-returns.patch | 236 + ...mcmodel-extreme-and-model-attribute-.patch | 180 + ...ongArch-Fix-warnings-building-libgcc.patch | 79 + ...ite-Remove-XFAIL-in-vect-ftint-no-in.patch | 30 + ...e-rtl.h-for-COSTS_N_INSNS-instead-of.patch | 44 + ...gArch-Fix-instruction-costs-PR112936.patch | 165 + 0072-LoongArch-Add-alslsi3_extend.patch | 53 + ...LoongArch-Add-support-for-D-frontend.patch | 224 + ...iber-context-switch-code-for-LoongAr.patch | 156 + ...ch-Fix-FP-vector-comparsons-PR113034.patch | 866 ++++ ...rce_reg-instead-of-gen_reg_rtx-emit_.patch | 190 + ...LoongArch-Clean-up-vec_init-expander.patch | 83 + ...correct-code-generation-for-sad-patt.patch | 78 + ...-the-check-type-of-the-vector-builti.patch | 68 + ...end.texi-Fix-typos-in-LSX-intrinsics.patch | 250 + ...iltin-function-prototypes-for-LASX-i.patch | 60 + ...m-modifiers-to-the-LSX-and-LASX-dire.patch | 92 + ...ent-FCCmode-reload-and-cstore-ANYF-m.patch | 392 ++ ...gn_extend-pattern-for-32-bit-rotate-.patch | 69 + ...bug-in-bstrins_-mode-_for_ior_mask-t.patch | 37 + ...sn-output-of-vec_concat-templates-fo.patch | 132 + ...E-when-passing-two-same-vector-argum.patch | 232 + ...-left-rotate-to-right-rotate-with-ne.patch | 253 + ...finite-secondary-reloading-of-FCCmod.patch | 104 + ...e-mexplicit-relocs-auto-simple-used-.patch | 305 ++ ...e-format-of-bstrins_-mode-_for_ior_m.patch | 33 + ...LoongArch-Added-TLS-Le-Relax-support.patch | 280 ++ ...de-fmin-fmax-RTL-pattern-for-vectors.patch | 112 + ...constant-vector-permuatation-impleme.patch | 1484 ++++++ ...uite-Fix-FAIL-in-lasx-xvstelm.c-file.patch | 34 + ...ite-Modify-the-test-behavior-of-the-.patch | 47 + ...ite-Delete-the-default-run-behavior-.patch | 31 + ...ite-Added-additional-vectorization-m.patch | 157 + ...ite-Give-up-the-detection-of-the-gcc.patch | 80 + ...the-problem-of-incorrect-judgment-of.patch | 206 + ...e-lasx_xvpermi_q_-LASX-mode-insn-pat.patch | 150 + ...ent-vec_init-M-N-where-N-is-a-LSX-ve.patch | 253 + ...-ISA-evolution-switches-along-with-o.patch | 533 ++ ...e-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch | 220 + 0105-LoongArch-Use-enums-for-constants.patch | 181 + ...Simplify-mexplicit-reloc-definitions.patch | 124 + ...ite-Add-loongarch-support-to-slp-21..patch | 35 + ...zed-some-of-the-symbolic-expansion-i.patch | 228 + ...ngArch-Implement-option-save-restore.patch | 467 ++ ...ant-sign-extension-elimination-optim.patch | 234 + ...ant-sign-extension-elimination-optim.patch | 56 + ...-the-u-attribute-to-the-mem-to-which.patch | 64 + ...ite-Fix-fail-in-gen-vect-2-25-.c-fil.patch | 51 + ...ove-constraint-z-from-movsi_internal.patch | 43 + ...d-attribute-descriptions-defined-in-.patch | 47 + ...e-explicit-reloc-for-TLS-LD-GD-with-.patch | 70 + ...ite-Disable-stack-protector-for-got-.patch | 35 + ...e-TLS-type-symbols-from-generating-n.patch | 65 + ...Arch-Remove-vec_concatz-mode-pattern.patch | 75 + ...ze-implementation-of-single-precisio.patch | 107 + ...-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch | 71 + ...vec_selects-of-bottom-elements-into-.patch | 84 + ...-the-address-calculation-logic-for-o.patch | 112 + ...e-template-got_load_tls_-ld-gd-le-ie.patch | 214 + ...e-macro-implementation-of-mcmodel-ex.patch | 453 ++ ...-explicit-reloc-for-extreme-TLS-GD-L.patch | 126 + ...support-for-loading-__get_tls_addr-s.patch | 72 + ...split-the-instructions-containing-re.patch | 514 ++ ...-cost-of-vector_stmt-that-match-mult.patch | 173 + ...correct-return-type-for-frecipe-frsq.patch | 113 + 0131-LoongArch-Fix-an-ODR-violation.patch | 60 + ...ite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch | 359 ++ ...out-of-bounds-access-in-loongarch_sy.patch | 72 + ...rch-Fix-wrong-LSX-FP-vector-negation.patch | 122 + ...ong-return-value-type-of-__iocsrrd_h.patch | 30 + ...-redundant-symbol-type-conversions-i.patch | 337 ++ ...hecking-whether-the-assembler-suppor.patch | 54 + ...falsely-claim-gold-supported-in-topl.patch | 49 + ...-Deduplicate-crc-instruction-defines.patch | 56 + ...-unneeded-sign-extension-after-crc-c.patch | 70 + ...ongArch-Allow-s9-as-a-register-alias.patch | 45 + ...ite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch | 1117 +++++ ...b-instead-of-lib64-as-the-library-se.patch | 80 + ...ite-Fix-problems-with-incorrect-resu.patch | 551 +++ ...an-issue-with-the-implementation-of-.patch | 130 + ...ite-Add-compilation-options-to-the-r.patch | 30 + ..._LARCH_RELAX-for-TLS-IE-with-non-ext.patch | 137 + ...-unused-and-incorrect-sge-u-_-X-mode.patch | 57 + ...-masking-process-for-operand-3-of-xv.patch | 85 + ...3-.-functions-returning-large-aggreg.patch | 48 + ...ch-Remove-unused-useless-definitions.patch | 123 + ...-loongarch_expand_vec_cmp-s-return-t.patch | 110 + ...e-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch | 104 + 0154-LoongArch-Fix-a-typo-PR-114407.patch | 30 + ...test-case-for-negating-FP-vectors-co.patch | 68 + ...scriptions-of-the-compilation-option.patch | 83 + ...loongarch_option_override_internal-i.patch | 800 +++ ...ngArch-Regenerate-loongarch.opt.urls.patch | 117 + ...Arch-Add-support-for-TLS-descriptors.patch | 724 +++ ...-LoongArch-Fix-missing-plugin-header.patch | 32 + 0161-LoongArch-Remove-unused-code.patch | 344 ++ ...fault-alignment-for-functions-jumps-.patch | 135 + 0163-LoongArch-Enable-switchable-target.patch | 281 ++ 0164-LoongArch-Define-ISA-versions.patch | 1016 ++++ ...ne-builtin-macros-for-ISA-evolutions.patch | 678 +++ ...nstraints-for-bit-string-operation-d.patch | 120 + ...REGNO-with-REG_P-in-loongarch_expand.patch | 67 + ...de-size-comparision-in-loongarch_exp.patch | 36 + ...gArch-Use-bstrins-for-value-1u-const.patch | 135 + ...gArch-Tweak-IOR-rtx_cost-for-bstrins.patch | 158 + ...dup-and-sort-the-comment-in-loongarc.patch | 44 + ...plicit-relocs-extreme-tls-desc.c-tes.patch | 45 + ...-loongarch_insn_cost-and-set-the-cos.patch | 70 + ...-is-not-allowed-to-be-stored-in-the-.patch | 73 + 0175-LoongArch-Remove-unreachable-codes.patch | 249 + ...ze-the-code-related-to-split-move-an.patch | 413 ++ ...-some-SImode-operations-through-si3_.patch | 364 ++ ...ins_zero_bitmask_operand-and-remove-.patch | 123 + ...h-Rework-bswap-hi-si-di-2-definition.patch | 224 + ...-testsuite-fix-dg-do-preprocess-typo.patch | 26 + ...-gawk-extension-from-a-generator-scr.patch | 47 + ...iorn-and-andn-standard-pattern-names.patch | 226 + 0183-LoongArch-Drop-vcond-u-expanders.patch | 127 + ...e-ashr-lshr-and-ashl-RTL-pattern-for.patch | 220 + ...ent-scalar-isinf-isnormal-and-isfini.patch | 203 + ...ch-Add-support-to-annotate-tablejump.patch | 155 + 0187-LoongArch-Fix-up-r15-4130.patch | 32 + ...Update-build-scripts-for-LoongArch64.patch | 304 ++ 0189-LoongArch-fix-building-errors.patch | 273 + ...n-110702-avoid-zero-based-memory-ref.patch | 119 + ...ngArch-Change-OSDIR-for-distribution.patch | 25 + gcc.spec | 395 +- 192 files changed, 46369 insertions(+), 4 deletions(-) create mode 100644 0001-LoongArch-Reimplement-multilib-build-option-handling.patch create mode 100644 0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch create mode 100644 0003-Modify-gas-uleb128-support-test.patch create mode 100644 0004-LoongArch-Optimizations-of-vector-construction.patch create mode 100644 0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch create mode 100644 0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch create mode 100644 0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch create mode 100644 0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch create mode 100644 0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch create mode 100644 0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch create mode 100644 0011-LoongArch-Implement-avg-and-sad-standard-names.patch create mode 100644 0012-LoongArch-Implement-vec_widen-standard-names.patch create mode 100644 0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch create mode 100644 0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch create mode 100644 0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch create mode 100644 0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch create mode 100644 0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch create mode 100644 0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch create mode 100644 0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch create mode 100644 0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch create mode 100644 0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch create mode 100644 0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch create mode 100644 0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch create mode 100644 0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch create mode 100644 0025-LoongArch-Optimize-single-used-address-with-mexplici.patch create mode 100644 0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch create mode 100644 0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch create mode 100644 0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch create mode 100644 0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch create mode 100644 0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch create mode 100644 0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch create mode 100644 0032-LoongArch-Add-code-generation-support-for-call36-fun.patch create mode 100644 0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch create mode 100644 0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch create mode 100644 0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch create mode 100644 0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch create mode 100644 0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch create mode 100644 0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch create mode 100644 0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch create mode 100644 0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch create mode 100644 0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch create mode 100644 0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch create mode 100644 0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch create mode 100644 0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch create mode 100644 0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch create mode 100644 0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch create mode 100644 0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch create mode 100644 0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch create mode 100644 0049-LoongArch-Remove-lrint_allow_inexact.patch create mode 100644 0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch create mode 100644 0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch create mode 100644 0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch create mode 100644 0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch create mode 100644 0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch create mode 100644 0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch create mode 100644 0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch create mode 100644 0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch create mode 100644 0058-LoongArch-Add-support-for-xorsign.patch create mode 100644 0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch create mode 100644 0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch create mode 100644 0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch create mode 100644 0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch create mode 100644 0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch create mode 100644 0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch create mode 100644 0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch create mode 100644 0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch create mode 100644 0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch create mode 100644 0068-LoongArch-Fix-warnings-building-libgcc.patch create mode 100644 0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch create mode 100644 0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch create mode 100644 0071-LoongArch-Fix-instruction-costs-PR112936.patch create mode 100644 0072-LoongArch-Add-alslsi3_extend.patch create mode 100644 0073-LoongArch-Add-support-for-D-frontend.patch create mode 100644 0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch create mode 100644 0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch create mode 100644 0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch create mode 100644 0077-LoongArch-Clean-up-vec_init-expander.patch create mode 100644 0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch create mode 100644 0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch create mode 100644 0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch create mode 100644 0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch create mode 100644 0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch create mode 100644 0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch create mode 100644 0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch create mode 100644 0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch create mode 100644 0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch create mode 100644 0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch create mode 100644 0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch create mode 100644 0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch create mode 100644 0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch create mode 100644 0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch create mode 100644 0092-LoongArch-Added-TLS-Le-Relax-support.patch create mode 100644 0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch create mode 100644 0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch create mode 100644 0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch create mode 100644 0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch create mode 100644 0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch create mode 100644 0098-LoongArch-testsuite-Added-additional-vectorization-m.patch create mode 100644 0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch create mode 100644 0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch create mode 100644 0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch create mode 100644 0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch create mode 100644 0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch create mode 100644 0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch create mode 100644 0105-LoongArch-Use-enums-for-constants.patch create mode 100644 0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch create mode 100644 0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch create mode 100644 0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch create mode 100644 0109-LoongArch-Implement-option-save-restore.patch create mode 100644 0110-LoongArch-Redundant-sign-extension-elimination-optim.patch create mode 100644 0111-LoongArch-Redundant-sign-extension-elimination-optim.patch create mode 100644 0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch create mode 100644 0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch create mode 100644 0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch create mode 100644 0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch create mode 100644 0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch create mode 100644 0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch create mode 100644 0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch create mode 100644 0119-LoongArch-Remove-vec_concatz-mode-pattern.patch create mode 100644 0120-LoongArch-Optimize-implementation-of-single-precisio.patch create mode 100644 0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch create mode 100644 0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch create mode 100644 0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch create mode 100644 0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch create mode 100644 0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch create mode 100644 0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch create mode 100644 0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch create mode 100644 0128-LoongArch-Don-t-split-the-instructions-containing-re.patch create mode 100644 0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch create mode 100644 0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch create mode 100644 0131-LoongArch-Fix-an-ODR-violation.patch create mode 100644 0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch create mode 100644 0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch create mode 100644 0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch create mode 100644 0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch create mode 100644 0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch create mode 100644 0137-LoongArch-When-checking-whether-the-assembler-suppor.patch create mode 100644 0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch create mode 100644 0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch create mode 100644 0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch create mode 100644 0141-LoongArch-Allow-s9-as-a-register-alias.patch create mode 100644 0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch create mode 100644 0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch create mode 100644 0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch create mode 100644 0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch create mode 100644 0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch create mode 100644 0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch create mode 100644 0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch create mode 100644 0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch create mode 100644 0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch create mode 100644 0151-LoongArch-Remove-unused-useless-definitions.patch create mode 100644 0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch create mode 100644 0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch create mode 100644 0154-LoongArch-Fix-a-typo-PR-114407.patch create mode 100644 0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch create mode 100644 0156-LoongArch-Add-descriptions-of-the-compilation-option.patch create mode 100644 0157-LoongArch-Split-loongarch_option_override_internal-i.patch create mode 100644 0158-LoongArch-Regenerate-loongarch.opt.urls.patch create mode 100644 0159-LoongArch-Add-support-for-TLS-descriptors.patch create mode 100644 0160-LoongArch-Fix-missing-plugin-header.patch create mode 100644 0161-LoongArch-Remove-unused-code.patch create mode 100644 0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch create mode 100644 0163-LoongArch-Enable-switchable-target.patch create mode 100644 0164-LoongArch-Define-ISA-versions.patch create mode 100644 0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch create mode 100644 0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch create mode 100644 0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch create mode 100644 0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch create mode 100644 0169-LoongArch-Use-bstrins-for-value-1u-const.patch create mode 100644 0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch create mode 100644 0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch create mode 100644 0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch create mode 100644 0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch create mode 100644 0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch create mode 100644 0175-LoongArch-Remove-unreachable-codes.patch create mode 100644 0176-LoongArch-Organize-the-code-related-to-split-move-an.patch create mode 100644 0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch create mode 100644 0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch create mode 100644 0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch create mode 100644 0180-testsuite-fix-dg-do-preprocess-typo.patch create mode 100644 0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch create mode 100644 0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch create mode 100644 0183-LoongArch-Drop-vcond-u-expanders.patch create mode 100644 0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch create mode 100644 0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch create mode 100644 0186-LoongArch-Add-support-to-annotate-tablejump.patch create mode 100644 0187-LoongArch-Fix-up-r15-4130.patch create mode 100644 0188-libphobos-Update-build-scripts-for-LoongArch64.patch create mode 100644 0189-LoongArch-fix-building-errors.patch create mode 100644 0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch create mode 100644 0191-LoongArch-Change-OSDIR-for-distribution.patch diff --git a/0001-LoongArch-Reimplement-multilib-build-option-handling.patch b/0001-LoongArch-Reimplement-multilib-build-option-handling.patch new file mode 100644 index 0000000..c3d8f9f --- /dev/null +++ b/0001-LoongArch-Reimplement-multilib-build-option-handling.patch @@ -0,0 +1,464 @@ +From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Wed, 13 Sep 2023 17:52:14 +0800 +Subject: [PATCH 001/188] LoongArch: Reimplement multilib build option + handling. + +Library build options from --with-multilib-list used to be processed with +*self_spec, which missed the driver's initial canonicalization. This +caused limitations on CFLAGS override and the use of driver-only options +like -m[no]-lsx. + +The problem is solved by promoting the injection rules of --with-multilib-list +options to the first element of DRIVER_SELF_SPECS, to make them execute before +the canonialization. The library-build options are also hard-coded in +the driver and can be used conveniently by the builders of other non-gcc +libraries via the use of -fmultiflags. + +Bootstrapped and tested on loongarch64-linux-gnu. + +ChangeLog: + + * config-ml.in: Remove unneeded loongarch clause. + * configure.ac: Register custom makefile fragments mt-loongarch-* + for loongarch targets. + * configure: Regenerate. + +config/ChangeLog: + + * mt-loongarch-mlib: New file. Pass -fmultiflags when building + target libraries (FLAGS_FOR_TARGET). + * mt-loongarch-elf: New file. + * mt-loongarch-gnu: New file. + +gcc/ChangeLog: + + * config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG. + * config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS + before the driver canonicalization routines. + * config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc. + to loongarch-driver.h + * config/loongarch/t-linux: Move multilib-related definitions to + t-multilib. + * config/loongarch/t-multilib: New file. Inject library build + options obtained from --with-multilib-list. + * config/loongarch/t-loongarch: Same. +--- + config-ml.in | 10 ---- + config/mt-loongarch-elf | 1 + + config/mt-loongarch-gnu | 2 + + config/mt-loongarch-mlib | 1 + + configure | 6 +++ + configure.ac | 6 +++ + gcc/config.gcc | 6 +-- + gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++ + gcc/config/loongarch/loongarch.h | 50 ------------------ + gcc/config/loongarch/t-linux | 66 +++--------------------- + gcc/config/loongarch/t-loongarch | 2 +- + gcc/config/loongarch/t-multilib | 68 +++++++++++++++++++++++++ + 12 files changed, 137 insertions(+), 123 deletions(-) + create mode 100644 config/mt-loongarch-elf + create mode 100644 config/mt-loongarch-gnu + create mode 100644 config/mt-loongarch-mlib + create mode 100644 gcc/config/loongarch/t-multilib + +diff --git a/config-ml.in b/config-ml.in +index ad0db0781..68854a4f1 100644 +--- a/config-ml.in ++++ b/config-ml.in +@@ -301,16 +301,6 @@ arm-*-*) + done + fi + ;; +-loongarch*-*) +- old_multidirs="${multidirs}" +- multidirs="" +- for x in ${old_multidirs}; do +- case "$x" in +- `${CC-gcc} --print-multi-directory`) : ;; +- *) multidirs="${multidirs} ${x}" ;; +- esac +- done +- ;; + m68*-*-*) + if [ x$enable_softfloat = xno ] + then +diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf +new file mode 100644 +index 000000000..bbf29bb57 +--- /dev/null ++++ b/config/mt-loongarch-elf +@@ -0,0 +1 @@ ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu +new file mode 100644 +index 000000000..dfefb44ed +--- /dev/null ++++ b/config/mt-loongarch-gnu +@@ -0,0 +1,2 @@ ++include $(srcdir)/config/mt-gnu ++include $(srcdir)/config/mt-loongarch-mlib +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +new file mode 100644 +index 000000000..4cfe568f1 +--- /dev/null ++++ b/config/mt-loongarch-mlib +@@ -0,0 +1 @@ ++FLAGS_FOR_TARGET += -fmultiflags +diff --git a/configure b/configure +index aff62c464..81b4a3cec 100755 +--- a/configure ++++ b/configure +@@ -9548,6 +9548,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/configure.ac b/configure.ac +index f310d75ca..9f8dbd319 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -2729,6 +2729,12 @@ case "${target}" in + spu-*-*) + target_makefile_frag="config/mt-spu" + ;; ++ loongarch*-*linux* | loongarch*-*gnu*) ++ target_makefile_frag="config/mt-loongarch-gnu" ++ ;; ++ loongarch*-*elf*) ++ target_makefile_frag="config/mt-loongarch-elf" ++ ;; + mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*) + target_makefile_frag="config/mt-sde" + ;; +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 3f870e966..e34a5fbb9 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2510,7 +2510,7 @@ loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" + extra_options="${extra_options} linux-android.opt" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -2522,7 +2522,7 @@ loongarch*-*-linux*) + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" + tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" +- tmake_file="${tmake_file} loongarch/t-linux" ++ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes + +@@ -5241,7 +5241,7 @@ case "${target}" in + loongarch_multilib_list_sane=no + + # This one goes to TM_MULTILIB_CONFIG, for use in t-linux. +- loongarch_multilib_list_make="" ++ loongarch_multilib_list_make="${abi_base}," + + # This one goes to tm_defines, for use in loongarch-driver.c. + loongarch_multilib_list_c="" +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 6cfe0efb5..e7d083677 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3. If not see + + #include "loongarch-str.h" + ++#ifndef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_CC1_SPEC ++#define SUBTARGET_CC1_SPEC "" ++#endif ++ ++#ifndef SUBTARGET_ASM_SPEC ++#define SUBTARGET_ASM_SPEC "" ++#endif ++ ++#define EXTRA_SPECS \ ++ {"early_self_spec", ""}, \ ++ {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ ++ {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ ++ {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, ++ ++ ++#undef CPP_SPEC ++#define CPP_SPEC \ ++ "%(subtarget_cpp_spec)" ++ ++#undef CC1_SPEC ++#define CC1_SPEC \ ++ "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \ ++ "%(subtarget_cc1_spec)" ++ ++#undef ASM_SPEC ++#define ASM_SPEC \ ++ "%{mabi=*} %(subtarget_asm_spec)" ++ ++ + extern const char* + la_driver_init (int argc, const char **argv); + +@@ -45,7 +78,16 @@ driver_get_normalized_m_opts (int argc, const char **argv); + #define LA_SET_PARM_SPEC(NAME) \ + " %{m" OPTSTR_##NAME "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \ + ++/* For MLIB_SELF_SPECS. */ ++#include "loongarch-multilib.h" ++ ++#ifndef MLIB_SELF_SPECS ++#define MLIB_SELF_SPECS "" ++#endif ++ + #define DRIVER_HANDLE_MACHINE_OPTIONS \ ++ " %(early_self_spec)", \ ++ MLIB_SELF_SPECS \ + " %:driver_init()" \ + " %{c|S|E|nostdlib: %:set_no_link()}" \ + " %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \ +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index c7e91a06d..a443a6427 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -64,56 +64,6 @@ along with GCC; see the file COPYING3. If not see + #define NM_FLAGS "-Bn" + #endif + +-/* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be +- overridden by subtargets. */ +- +-#ifndef SUBTARGET_ASM_SPEC +-#define SUBTARGET_ASM_SPEC "" +-#endif +- +-#undef ASM_SPEC +-#define ASM_SPEC "%{mabi=*} %{subtarget_asm_spec}" +- +-/* Extra switches sometimes passed to the linker. */ +- +-#ifndef LINK_SPEC +-#define LINK_SPEC "" +-#endif /* LINK_SPEC defined */ +- +-/* Specs for the compiler proper. */ +- +-/* CC1_SPEC is the set of arguments to pass to the compiler proper. */ +- +-#undef CC1_SPEC +-#define CC1_SPEC "%{,ada:-gnatea} %{m*} \ +-%{G*} \ +-%(subtarget_cc1_spec) %{,ada:-gnatez}" +- +-/* Preprocessor specs. */ +- +-/* SUBTARGET_CPP_SPEC is passed to the preprocessor. It may be +- overridden by subtargets. */ +-#ifndef SUBTARGET_CPP_SPEC +-#define SUBTARGET_CPP_SPEC "" +-#endif +- +-#define CPP_SPEC "%(subtarget_cpp_spec)" +- +-/* This macro defines names of additional specifications to put in the specs +- that can be used in various specifications like CC1_SPEC. Its definition +- is an initializer with a subgrouping for each command option. +- +- Each subgrouping contains a string constant, that defines the +- specification name, and a string constant that used by the GCC driver +- program. +- +- Do not define this macro if it does not need to do anything. */ +- +-#define EXTRA_SPECS \ +- {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \ +- {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \ +- {"subtarget_asm_spec", SUBTARGET_ASM_SPEC}, +- + /* Registers may have a prefix which can be ignored when matching + user asm and register definitions. */ + #ifndef REGISTER_PREFIX +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 62a870b66..7cd7cde25 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -16,68 +16,16 @@ + # along with GCC; see the file COPYING3. If not see + # . + +-# Multilib +-MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s +-MULTILIB_DIRNAMES = base/lp64d base/lp64f base/lp64s +- +-# The GCC driver always gets all abi-related options on the command line. +-# (see loongarch-driver.c:driver_get_normalized_m_opts) +-comma=, +-MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\ +- $(firstword $(subst /, ,$(mlib)))) +- +-SPECS = specs.install +- +-# temporary self_spec when building libraries (e.g. libgcc) +-gen_mlib_spec = $(if $(word 2,$1),\ +- %{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))}) +- +-# clean up the result of DRIVER_SELF_SPEC to avoid conflict +-lib_build_self_spec = % $@ +- +-# Do some preparation before regression tests: +-# remove lib-build-specs / make symlinks for the toplevel multilib variant +- +-LA_DEFAULT_MULTISUBDIR = $(shell $(GCC_FOR_TARGET) --print-multi-dir) +-.PHONY: remove-lib-specs +-check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs +-remove-lib-specs: +- -mv -f specs.install specs 2>/dev/null +- -mv $(LA_DEFAULT_MULTISUBDIR)/* ./ +- -mkdir -p ../$(target_noncanonical)/`dirname $(LA_DEFAULT_MULTISUBDIR)` +- -$(LN_S) .. ../$(target_noncanonical)/$(LA_DEFAULT_MULTISUBDIR) +- +-# Multiarch +-ifneq ($(call if_multiarch,yes),yes) +- # Define LA_DISABLE_MULTIARCH if multiarch is disabled. +- tm_defines += LA_DISABLE_MULTIARCH +-else +- # Only define MULTIARCH_DIRNAME when multiarch is enabled, +- # or it would always introduce ${target} into the search path. +- MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET) +-endif ++MULTIOSDIR_lp64d := ../lib64$(call if_multiarch,:loongarch64-linux-gnu) ++MULTIOSDIR_lp64f := ../lib64/f32$(call if_multiarch,:loongarch64-linux-gnuf32) ++MULTIOSDIR_lp64s := ../lib64/sf$(call if_multiarch,:loongarch64-linux-gnusf) + + # Don't define MULTILIB_OSDIRNAMES if multilib is disabled. + ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + +- MULTILIB_OSDIRNAMES = \ +- mabi.lp64d=../lib64$\ +- $(call if_multiarch,:loongarch64-linux-gnu) +- +- MULTILIB_OSDIRNAMES += \ +- mabi.lp64f=../lib64/f32$\ +- $(call if_multiarch,:loongarch64-linux-gnuf32) +- +- MULTILIB_OSDIRNAMES += \ +- mabi.lp64s=../lib64/sf$\ +- $(call if_multiarch,:loongarch64-linux-gnusf) ++ MULTILIB_OSDIRNAMES = .=$(MULTIOSDIR_$(mlib_default)) ++ MULTILIB_OSDIRNAMES += mabi.lp64d=$(MULTIOSDIR_lp64d) ++ MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) ++ MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + + endif +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index e73f4f437..28cfb49df 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,7 +16,7 @@ + # along with GCC; see the file COPYING3. If not see + # . + +-TM_H += $(srcdir)/config/loongarch/loongarch-driver.h ++TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-tune.h + +diff --git a/gcc/config/loongarch/t-multilib b/gcc/config/loongarch/t-multilib +new file mode 100644 +index 000000000..bf6c18298 +--- /dev/null ++++ b/gcc/config/loongarch/t-multilib +@@ -0,0 +1,68 @@ ++# Copyright (C) 2023 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++# Helper definitions ++comma=, ++null := ++space := $(null) # ++exclude_1st = $(wordlist 2,$(words $1),$1) ++ ++# Common definitions ++mlib_all := lp64d lp64f lp64s ++$(foreach i,$(mlib_all),$(eval MULTISUBDIR_$i := base/$i)) ++ ++mlib_default := $(firstword $(subst $(comma), ,$(TM_MULTILIB_CONFIG))) ++mlib_all := $(filter-out $(mlib_default),$(mlib_all)) ++ ++MULTILIB_OPTIONS := $(subst $(space),/,$(foreach i,$(mlib_all),mabi=$(i))) ++MULTILIB_DIRNAMES := $(foreach i,$(mlib_all),$(MULTISUBDIR_$(i))) ++ ++# Customize builds with --with-multilib-list ++MULTILIB_REQUIRED := $(foreach i,$(call exclude_1st,\ ++ $(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\ ++ $(firstword $(subst /, ,$(i)))) ++ ++## spec rules for building libraries, triggered by -fmultiflags ++gen_mlib_spec = $(if $(word 2,$1),\ ++ %{$(firstword $1):$(patsubst %,-%,$(call exclude_1st,$1)})) ++ ++lib_build_spec = $(foreach mlib,\ ++ $(call exclude_1st,$(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\ ++ $(call gen_mlib_spec,$(subst /, ,$(mlib)))) ++ ++default_mlib_spec := %{fmultiflags:%{!mabi=*:-mabi=$(mlib_default)}} ++lib_build_spec := %{fmultiflags:$(lib_build_spec)} ++ ++ifneq ($(TM_MULTILIB_CONFIG),) ++loongarch-multilib.h: ++ @echo "#define MLIB_SELF_SPECS" \ ++ "\"$(default_mlib_spec)\"," \ ++ "\"$(lib_build_spec)\"," > $@ ++else ++loongarch-multilib.h: ; @touch $@ ++endif ++ ++# Multiarch ++ifneq ($(call if_multiarch,yes),yes) ++ # Define LA_DISABLE_MULTIARCH if multiarch is disabled. ++ tm_defines += LA_DISABLE_MULTIARCH ++else ++ # Only define MULTIARCH_DIRNAME when multiarch is enabled, ++ # or it would always introduce ${target} into the search path. ++ MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET) ++endif +-- +2.43.0 + diff --git a/0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch b/0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch new file mode 100644 index 0000000..9281f74 --- /dev/null +++ b/0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch @@ -0,0 +1,192 @@ +From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 15 Sep 2023 10:22:49 +0800 +Subject: [PATCH 002/188] LoongArch: Check whether binutils supports the relax + function. If supported, explicit relocs are turned off by default. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/genopts/loongarch.opt.in: Add compilation option + mrelax. And set the initial value of explicit-relocs according to the + detection status. + * config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the + --no-relax option to the linker. + * config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with + -mno-relax, pass the -mno-relax option to the assembler. + * config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro. + * config/loongarch/loongarch.opt: Regenerate. + * configure: Regenerate. + * configure.ac: Add detection of support for binutils relax function. +--- + gcc/config.in | 6 ++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++++- + gcc/config/loongarch/gnu-user.h | 3 +- + gcc/config/loongarch/loongarch-driver.h | 2 +- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.opt | 7 ++++- + gcc/configure | 31 +++++++++++++++++++ + gcc/configure.ac | 4 +++ + 8 files changed, 60 insertions(+), 4 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0dff36199..0c55e67e7 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -637,6 +637,12 @@ + #endif + + ++/* Define if your assembler supports -mrelax option. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_MRELAX_OPTION ++#endif ++ ++ + /* Define if your assembler supports .mspabi_attribute. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_MSPABI_ATTRIBUTE +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 2ef1b1e3b..f18733c24 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -214,3 +214,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 44e4f2575..60ef75601 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3. If not see + "%{!shared: %{static} " \ + "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \ + "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \ +- "%{static-pie: -static -pie --no-dynamic-linker -z text}}" ++ "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \ ++ "%{mno-relax: --no-relax}" + + + /* Similar to standard Linux, but adding -ffast-math support. */ +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index e7d083677..59fa3263d 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3. If not see + + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %(subtarget_asm_spec)" ++ "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 624e246bb..f2b59abe6 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_MRELAX_OPTION ++#define HAVE_AS_MRELAX_OPTION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index f2d21c9f3..78f2baf3a 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS) ++Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) + Use %reloc() assembly operators. + + ; The code model option names for -mcmodel. +@@ -221,3 +221,8 @@ Specify the code model. + mdirect-extern-access + Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. ++ ++mrelax ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Take advantage of linker relaxations to reduce the number of instructions ++required to materialize symbol addresses. +diff --git a/gcc/configure b/gcc/configure +index 2a5d3aaf3..8ae8a924a 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then + + $as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5 ++$as_echo_n "checking assembler for -mrelax option... " >&6; } ++if ${gcc_cv_as_loongarch_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5 ++$as_echo "$gcc_cv_as_loongarch_relax" >&6; } ++if test $gcc_cv_as_loongarch_relax = yes; then ++ ++$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index ba2bf1ffc..f7161e66e 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5322,6 +5322,10 @@ x: + .cfi_endproc],, + [AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1, + [Define if your assembler supports eh_frame pcrel encoding.])]) ++ gcc_GAS_CHECK_FEATURE([-mrelax option], gcc_cv_as_loongarch_relax, ++ [-mrelax], [.text],, ++ [AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1, ++ [Define if your assembler supports -mrelax option.])]) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE([.gnu_attribute support], +-- +2.43.0 + diff --git a/0003-Modify-gas-uleb128-support-test.patch b/0003-Modify-gas-uleb128-support-test.patch new file mode 100644 index 0000000..4151ed7 --- /dev/null +++ b/0003-Modify-gas-uleb128-support-test.patch @@ -0,0 +1,115 @@ +From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001 +From: mengqinggang +Date: Fri, 15 Sep 2023 12:04:04 +0800 +Subject: [PATCH 003/188] Modify gas uleb128 support test + +Some assemblers (GNU as for LoongArch) generates relocations for leb128 +symbol arithmetic for relaxation, we need to disable relaxation probing +leb128 support then. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Checking assembler for -mno-relax support. + Disable relaxation when probing leb128 support. + +co-authored-by: Xi Ruoyao +--- + gcc/configure | 42 +++++++++++++++++++++++++++++++++++++++++- + gcc/configure.ac | 17 ++++++++++++++++- + 2 files changed, 57 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index 8ae8a924a..430d44dc3 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -24441,6 +24441,46 @@ _ACEOF + + + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5 ++$as_echo_n "checking assembler for -mno-relax support... " >&6; } ++if ${gcc_cv_as_mno_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_mno_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo '.text' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_mno_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5 ++$as_echo "$gcc_cv_as_mno_relax" >&6; } ++if test $gcc_cv_as_mno_relax = yes; then ++ check_leb128_asflags=-mno-relax ++fi ++ ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .[us]leb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +@@ -24459,7 +24499,7 @@ L1: + L2: + .uleb128 0x8000000000000000 + ' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index f7161e66e..4b24db190 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix) + + gcc_AC_INITFINI_ARRAY + ++# Some assemblers (GNU as for LoongArch) generates relocations for ++# leb128 symbol arithmetic for relaxation, we need to disable relaxation ++# probing leb128 support then. ++case $target in ++ loongarch*-*-*) ++ gcc_GAS_CHECK_FEATURE([-mno-relax support], ++ gcc_cv_as_mno_relax,[-mno-relax],[.text],, ++ [check_leb128_asflags=-mno-relax]) ++ ;; ++ *) ++ check_leb128_asflags= ++ ;; ++esac ++ + # Check if we have .[us]leb128, and support symbol arithmetic with it. + # Older versions of GAS and some non-GNU assemblers, have a bugs handling + # these directives, even when they appear to accept them. +-gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128,, ++gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128, ++[$check_leb128_asflags], + [ .data + .uleb128 L2 - L1 + L1: +-- +2.43.0 + diff --git a/0004-LoongArch-Optimizations-of-vector-construction.patch b/0004-LoongArch-Optimizations-of-vector-construction.patch new file mode 100644 index 0000000..6a2c022 --- /dev/null +++ b/0004-LoongArch-Optimizations-of-vector-construction.patch @@ -0,0 +1,1310 @@ +From b74895b8b723a64bc136c4c560661abed81e013a Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Thu, 21 Sep 2023 09:19:18 +0800 +Subject: [PATCH 004/188] LoongArch: Optimizations of vector construction. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_vecinit_merge_): New + pattern for vector construction. + (vec_set_internal): Ditto. + (lasx_xvinsgr2vr__internal): Ditto. + (lasx_xvilvl__internal): Ditto. + * config/loongarch/loongarch.cc (loongarch_expand_vector_init): + Optimized the implementation of vector construction. + (loongarch_expand_vector_init_same): New function. + * config/loongarch/lsx.md (lsx_vilvl__internal): New + pattern for vector construction. + (lsx_vreplvei_mirror_): New pattern for vector + construction. + (vec_concatv2df): Ditto. + (vec_concatv4sf): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c: New test. +--- + gcc/config/loongarch/lasx.md | 69 ++ + gcc/config/loongarch/loongarch.cc | 716 +++++++++--------- + gcc/config/loongarch/lsx.md | 134 ++++ + .../vector/lasx/lasx-vec-construct-opt.c | 102 +++ + .../vector/lsx/lsx-vec-construct-opt.c | 85 +++ + 5 files changed, 732 insertions(+), 374 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 8111c8bb7..2bc5d47ed 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -186,6 +186,9 @@ + UNSPEC_LASX_XVLDI + UNSPEC_LASX_XVLDX + UNSPEC_LASX_XVSTX ++ UNSPEC_LASX_VECINIT_MERGE ++ UNSPEC_LASX_VEC_SET_INTERNAL ++ UNSPEC_LASX_XVILVL_INTERNAL + ]) + + ;; All vector modes with 256 bits. +@@ -255,6 +258,15 @@ + [(V8SF "V4SF") + (V4DF "V2DF")]) + ++;; The attribute gives half int/float modes for vector modes. ++(define_mode_attr VHMODE256_ALL ++ [(V32QI "V16QI") ++ (V16HI "V8HI") ++ (V8SI "V4SI") ++ (V4DI "V2DI") ++ (V8SF "V4SF") ++ (V4DF "V2DF")]) ++ + ;; The attribute gives double modes for vector modes in LASX. + (define_mode_attr VDMODE256 + [(V8SI "V4DI") +@@ -312,6 +324,11 @@ + (V4DI "v4df") + (V8SI "v8sf")]) + ++;; This attribute gives V32QI mode and V16HI mode with half size. ++(define_mode_attr mode256_i_half ++ [(V32QI "v16qi") ++ (V16HI "v8hi")]) ++ + ;; This attribute gives suffix for LASX instructions. HOW? + (define_mode_attr lasxfmt + [(V4DF "d") +@@ -756,6 +773,20 @@ + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Support a LSX-mode input op2. ++(define_insn "lasx_vecinit_merge_" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (unspec:LASX ++ [(match_operand:LASX 1 "register_operand" "0") ++ (match_operand: 2 "register_operand" "f") ++ (match_operand 3 "const_uimm8_operand")] ++ UNSPEC_LASX_VECINIT_MERGE))] ++ "ISA_HAS_LASX" ++ "xvpermi.q\t%u0,%u2,%3" ++ [(set_attr "type" "simd_splat") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvpickve2gr_d" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_extend:DI +@@ -779,6 +810,33 @@ + DONE; + }) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Simulate missing instructions xvinsgr2vr.b and xvinsgr2vr.h. ++(define_expand "vec_set_internal" ++ [(match_operand:ILASX_HB 0 "register_operand") ++ (match_operand: 1 "reg_or_0_operand") ++ (match_operand 2 "const__operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx index = GEN_INT (1 << INTVAL (operands[2])); ++ emit_insn (gen_lasx_xvinsgr2vr__internal ++ (operands[0], operands[1], operands[0], index)); ++ DONE; ++}) ++ ++(define_insn "lasx_xvinsgr2vr__internal" ++ [(set (match_operand:ILASX_HB 0 "register_operand" "=f") ++ (unspec:ILASX_HB [(match_operand: 1 "reg_or_0_operand" "rJ") ++ (match_operand:ILASX_HB 2 "register_operand" "0") ++ (match_operand 3 "const__operand" "")] ++ UNSPEC_LASX_VEC_SET_INTERNAL))] ++ "ISA_HAS_LASX" ++{ ++ return "vinsgr2vr.\t%w0,%z1,%y3"; ++} ++ [(set_attr "type" "simd_insert") ++ (set_attr "mode" "")]) ++ + (define_expand "vec_set" + [(match_operand:FLASX 0 "register_operand") + (match_operand: 1 "reg_or_0_operand") +@@ -1567,6 +1625,17 @@ + [(set_attr "type" "simd_flog2") + (set_attr "mode" "")]) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Merge two scalar floating-point op1 and op2 into a LASX op0. ++(define_insn "lasx_xvilvl__internal" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand: 1 "register_operand" "f") ++ (match_operand: 2 "register_operand" "f")] ++ UNSPEC_LASX_XVILVL_INTERNAL))] ++ "ISA_HAS_LASX" ++ "xvilvl.\t%u0,%u2,%u1" ++ [(set_attr "type" "simd_permute") ++ (set_attr "mode" "")]) + + (define_insn "smax3" + [(set (match_operand:FLASX 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index f2e796a6b..760b12268 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10193,300 +10193,344 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + ops[1]))); + } + ++/* Expand initialization of a vector which has all same elements. */ ++ + void +-loongarch_expand_vector_init (rtx target, rtx vals) ++loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); +- unsigned i, nelt = GET_MODE_NUNITS (vmode); +- unsigned nvar = 0; +- bool all_same = true; +- rtx x; ++ rtx same = XVECEXP (vals, 0, 0); ++ rtx temp, temp2; + +- for (i = 0; i < nelt; ++i) ++ if (CONST_INT_P (same) && nvar == 0 ++ && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) ++ { ++ switch (vmode) ++ { ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); ++ emit_move_insn (target, temp); ++ return; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ temp = gen_reg_rtx (imode); ++ if (imode == GET_MODE (same)) ++ temp2 = same; ++ else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { +- x = XVECEXP (vals, 0, i); +- if (!loongarch_constant_elt_p (x)) +- nvar++; +- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) +- all_same = false; ++ if (GET_CODE (same) == MEM) ++ { ++ rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); ++ loongarch_emit_move (reg_tmp, same); ++ temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); ++ } ++ else ++ temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + } +- +- if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32) ++ else + { +- if (all_same) ++ if (GET_CODE (same) == MEM) + { +- rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; ++ rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); ++ loongarch_emit_move (reg_tmp, same); ++ temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ } ++ else ++ temp2 = lowpart_subreg (imode, same, GET_MODE (same)); ++ } ++ emit_move_insn (temp, temp2); + +- if (CONST_INT_P (same) && nvar == 0 +- && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +- { +- switch (vmode) +- { +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); +- emit_move_insn (target, temp); +- return; ++ switch (vmode) ++ { ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp)); ++ break; + +- default: +- gcc_unreachable (); +- } +- } ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp)); ++ break; + +- temp = gen_reg_rtx (imode); +- if (imode == GET_MODE (same)) +- temp2 = same; +- else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp), 0); +- } +- else +- temp2 = simplify_gen_subreg (imode, same, +- GET_MODE (same), 0); +- } +- else +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp)); +- } +- else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); +- } +- emit_move_insn (temp, temp2); ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp)); ++ break; + +- switch (vmode) +- { +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- loongarch_emit_move (target, +- gen_rtx_VEC_DUPLICATE (vmode, temp)); +- break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); ++ break; + +- case E_V8SFmode: +- emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp)); +- break; ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); ++ break; + +- case E_V4DFmode: +- emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp)); +- break; ++ default: ++ gcc_unreachable (); ++ } ++} + +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- rtvec vec = shallow_copy_rtvec (XVEC (vals, 0)); ++/* Expand a vector initialization. */ + +- for (i = 0; i < nelt; ++i) +- RTVEC_ELT (vec, i) = CONST0_RTX (imode); ++void ++loongarch_expand_vector_init (rtx target, rtx vals) ++{ ++ machine_mode vmode = GET_MODE (target); ++ machine_mode imode = GET_MODE_INNER (vmode); ++ unsigned i, nelt = GET_MODE_NUNITS (vmode); ++ /* VALS is divided into high and low half-part. */ ++ /* Number of non constant elements in corresponding parts of VALS. */ ++ unsigned nvar = 0, hi_nvar = 0, lo_nvar = 0; ++ /* all_same : true if all elements of VALS are the same. ++ hi_same : true if all elements of the high half-part are the same. ++ lo_same : true if all elements of the low half-part are the same. ++ half_same : true if the high half-part is the same as the low one. */ ++ bool all_same = false, hi_same = true, lo_same = true, half_same = true; ++ rtx val[32], val_hi[32], val_lo[16]; ++ rtx x, op0, op1; ++ /* Copy one element of vals to per element of target vector. */ ++ typedef rtx (*loongarch_vec_repl1_fn) (rtx, rtx); ++ /* Copy two elements of vals to target vector. */ ++ typedef rtx (*loongarch_vec_repl2_fn) (rtx, rtx, rtx); ++ /* Insert scalar operands into the specified position of the vector. */ ++ typedef rtx (*loongarch_vec_set_fn) (rtx, rtx, rtx); ++ /* Copy 64bit lowpart to highpart. */ ++ typedef rtx (*loongarch_vec_mirror_fn) (rtx, rtx, rtx); ++ /* Merge lowpart and highpart into target. */ ++ typedef rtx (*loongarch_vec_merge_fn) (rtx, rtx, rtx, rtx); ++ ++ loongarch_vec_repl1_fn loongarch_vec_repl1_128 = NULL, ++ loongarch_vec_repl1_256 = NULL; ++ loongarch_vec_repl2_fn loongarch_vec_repl2_128 = NULL, ++ loongarch_vec_repl2_256 = NULL; ++ loongarch_vec_set_fn loongarch_vec_set128 = NULL, loongarch_vec_set256 = NULL; ++ loongarch_vec_mirror_fn loongarch_vec_mirror = NULL; ++ loongarch_vec_merge_fn loongarch_lasx_vecinit_merge = NULL; ++ machine_mode half_mode = VOIDmode; ++ ++ /* Check whether elements of each part are the same. */ ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ val_hi[i] = val_hi[i + nelt / 2] = val[i + nelt / 2] ++ = XVECEXP (vals, 0, i + nelt / 2); ++ val_lo[i] = val[i] = XVECEXP (vals, 0, i); ++ if (!loongarch_constant_elt_p (val_hi[i])) ++ hi_nvar++; ++ if (!loongarch_constant_elt_p (val_lo[i])) ++ lo_nvar++; ++ if (i > 0 && !rtx_equal_p (val_hi[i], val_hi[0])) ++ hi_same = false; ++ if (i > 0 && !rtx_equal_p (val_lo[i], val_lo[0])) ++ lo_same = false; ++ if (!rtx_equal_p (val_hi[i], val_lo[i])) ++ half_same = false; ++ } ++ ++ /* If all elements are the same, set all_same true. */ ++ if (hi_same && lo_same && half_same) ++ all_same = true; ++ ++ nvar = hi_nvar + lo_nvar; + +- emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); ++ switch (vmode) ++ { ++ case E_V32QImode: ++ half_mode = E_V16QImode; ++ loongarch_vec_set256 = gen_vec_setv32qi_internal; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_b; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v32qi : gen_lasx_vecinit_merge_v32qi; ++ /* FALLTHRU. */ ++ case E_V16QImode: ++ loongarch_vec_set128 = gen_vec_setv16qi; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_b; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_b; ++ break; + +- machine_mode half_mode = VOIDmode; +- rtx target_hi, target_lo; ++ case E_V16HImode: ++ half_mode = E_V8HImode; ++ loongarch_vec_set256 = gen_vec_setv16hi_internal; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_h; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v16hi : gen_lasx_vecinit_merge_v16hi; ++ /* FALLTHRU. */ ++ case E_V8HImode: ++ loongarch_vec_set128 = gen_vec_setv8hi; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_h; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_h; ++ break; + +- switch (vmode) +- { +- case E_V32QImode: +- half_mode=E_V16QImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_b_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_b_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv16qi (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv16qi (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V8SImode: ++ half_mode = V4SImode; ++ loongarch_vec_set256 = gen_vec_setv8si; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_w; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v8si : gen_lasx_vecinit_merge_v8si; ++ /* FALLTHRU. */ ++ case E_V4SImode: ++ loongarch_vec_set128 = gen_vec_setv4si; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_w; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w; ++ break; + +- case E_V16HImode: +- half_mode=E_V8HImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_h_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_h_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv8hi (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv8hi (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V4DImode: ++ half_mode = E_V2DImode; ++ loongarch_vec_set256 = gen_vec_setv4di; ++ loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_d; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v4di : gen_lasx_vecinit_merge_v4di; ++ /* FALLTHRU. */ ++ case E_V2DImode: ++ loongarch_vec_set128 = gen_vec_setv2di; ++ loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_d; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d; ++ break; + +- case E_V8SImode: +- half_mode=V4SImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_w_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_w_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv4si (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv4si (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V8SFmode: ++ half_mode = E_V4SFmode; ++ loongarch_vec_set256 = gen_vec_setv8sf; ++ loongarch_vec_repl1_128 = gen_lsx_vreplvei_w_f_scalar; ++ loongarch_vec_repl2_256 = gen_lasx_xvilvl_w_f_internal; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v8sf : gen_lasx_vecinit_merge_v8sf; ++ /* FALLTHRU. */ ++ case E_V4SFmode: ++ loongarch_vec_set128 = gen_vec_setv4sf; ++ loongarch_vec_repl2_128 = gen_lsx_vilvl_w_f_internal; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w_f; ++ break; + +- case E_V4DImode: +- half_mode=E_V2DImode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) +- { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) +- { +- emit_insn (gen_lsx_vreplvei_d_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_d_scalar (target_lo, +- temp_lo)); +- } +- else +- { +- emit_insn (gen_vec_setv2di (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv2di (target_lo, temp_lo, +- GEN_INT (i))); +- } +- } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; ++ case E_V4DFmode: ++ half_mode = E_V2DFmode; ++ loongarch_vec_set256 = gen_vec_setv4df; ++ loongarch_vec_repl1_128 = gen_lsx_vreplvei_d_f_scalar; ++ loongarch_vec_repl2_256 = gen_lasx_xvilvl_d_f_internal; ++ loongarch_lasx_vecinit_merge ++ = half_same ? gen_lasx_xvpermi_q_v4df : gen_lasx_vecinit_merge_v4df; ++ /* FALLTHRU. */ ++ case E_V2DFmode: ++ loongarch_vec_set128 = gen_vec_setv2df; ++ loongarch_vec_repl2_128 = gen_lsx_vilvl_d_f_internal; ++ loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d_f; ++ break; + +- case E_V8SFmode: +- half_mode=E_V4SFmode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32) ++ { ++ /* If all elements are the same, just do a broadcost. */ ++ if (all_same) ++ loongarch_expand_vector_init_same (target, vals, nvar); ++ else ++ { ++ gcc_assert (nelt >= 4); ++ ++ rtx target_hi, target_lo; ++ /* Write elements of high half-part in target directly. */ ++ target_hi = target; ++ target_lo = gen_reg_rtx (half_mode); ++ ++ /* If all elements of high half-part are the same, ++ just do a broadcost. Also applicable to low half-part. */ ++ if (hi_same) ++ { ++ rtx vtmp = gen_rtx_PARALLEL (vmode, gen_rtvec_v (nelt, val_hi)); ++ loongarch_expand_vector_init_same (target_hi, vtmp, hi_nvar); ++ } ++ if (lo_same) ++ { ++ rtx vtmp ++ = gen_rtx_PARALLEL (half_mode, gen_rtvec_v (nelt / 2, val_lo)); ++ loongarch_expand_vector_init_same (target_lo, vtmp, lo_nvar); ++ } ++ ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ if (!hi_same) + { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); +- if (i == 0) ++ if (vmode == E_V8SFmode || vmode == E_V4DFmode) + { +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target_lo, +- temp_lo)); ++ /* Using xvilvl to load lowest 2 elements simultaneously ++ to reduce the number of instructions. */ ++ if (i == 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi[0]); ++ op1 = gen_reg_rtx (imode); ++ emit_move_insn (op1, val_hi[1]); ++ emit_insn ( ++ loongarch_vec_repl2_256 (target_hi, op0, op1)); ++ } ++ else if (i > 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi[i]); ++ emit_insn ( ++ loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); ++ } + } + else + { +- emit_insn (gen_vec_setv4sf (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv4sf (target_lo, temp_lo, +- GEN_INT (i))); ++ /* Assign the lowest element of val_hi to all elements ++ of target_hi. */ ++ if (i == 0) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi[0]); ++ emit_insn (loongarch_vec_repl1_256 (target_hi, op0)); ++ } ++ else if (!rtx_equal_p (val_hi[i], val_hi[0])) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_hi[i]); ++ emit_insn ( ++ loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); ++ } + } + } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; +- +- case E_V4DFmode: +- half_mode=E_V2DFmode; +- target_hi = gen_reg_rtx (half_mode); +- target_lo = gen_reg_rtx (half_mode); +- for (i = 0; i < nelt/2; ++i) ++ if (!lo_same && !half_same) + { +- rtx temp_hi = gen_reg_rtx (imode); +- rtx temp_lo = gen_reg_rtx (imode); +- emit_move_insn (temp_hi, XVECEXP (vals, 0, i+nelt/2)); +- emit_move_insn (temp_lo, XVECEXP (vals, 0, i)); ++ /* Assign the lowest element of val_lo to all elements ++ of target_lo. */ + if (i == 0) + { +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target_hi, +- temp_hi)); +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target_lo, +- temp_lo)); ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_lo[0]); ++ emit_insn (loongarch_vec_repl1_128 (target_lo, op0)); + } +- else ++ else if (!rtx_equal_p (val_lo[i], val_lo[0])) + { +- emit_insn (gen_vec_setv2df (target_hi, temp_hi, +- GEN_INT (i))); +- emit_insn (gen_vec_setv2df (target_lo, temp_lo, +- GEN_INT (i))); ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val_lo[i]); ++ emit_insn ( ++ loongarch_vec_set128 (target_lo, op0, GEN_INT (i))); + } + } +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, target_hi, +- target_lo))); +- break; +- +- default: +- gcc_unreachable (); + } +- ++ if (half_same) ++ { ++ emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, ++ target_hi, const0_rtx)); ++ return; ++ } ++ emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, target_lo, ++ GEN_INT (0x20))); + } + return; + } +@@ -10494,130 +10538,54 @@ loongarch_expand_vector_init (rtx target, rtx vals) + if (ISA_HAS_LSX) + { + if (all_same) ++ loongarch_expand_vector_init_same (target, vals, nvar); ++ else + { +- rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; +- +- if (CONST_INT_P (same) && nvar == 0 +- && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +- { +- switch (vmode) +- { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)); +- emit_move_insn (target, temp); +- return; +- +- default: +- gcc_unreachable (); +- } +- } +- temp = gen_reg_rtx (imode); +- if (imode == GET_MODE (same)) +- temp2 = same; +- else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +- { +- if (GET_CODE (same) == MEM) +- { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, +- GET_MODE (reg_tmp), 0); +- } +- else +- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); +- } +- else ++ for (i = 0; i < nelt; ++i) + { +- if (GET_CODE (same) == MEM) ++ if (vmode == E_V4SFmode || vmode == E_V2DFmode) + { +- rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); +- loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ /* Using vilvl to load lowest 2 elements simultaneously to ++ reduce the number of instructions. */ ++ if (i == 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val[0]); ++ op1 = gen_reg_rtx (imode); ++ emit_move_insn (op1, val[1]); ++ emit_insn (loongarch_vec_repl2_128 (target, op0, op1)); ++ } ++ else if (i > 1) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val[i]); ++ emit_insn ( ++ loongarch_vec_set128 (target, op0, GEN_INT (i))); ++ } + } + else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); +- } +- emit_move_insn (temp, temp2); +- +- switch (vmode) +- { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp)); +- break; +- +- case E_V4SFmode: +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); +- break; +- +- case E_V2DFmode: +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); +- break; +- +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- emit_move_insn (target, CONST0_RTX (vmode)); +- +- for (i = 0; i < nelt; ++i) +- { +- rtx temp = gen_reg_rtx (imode); +- emit_move_insn (temp, XVECEXP (vals, 0, i)); +- switch (vmode) + { +- case E_V16QImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_b_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv16qi (target, temp, GEN_INT (i))); +- break; +- +- case E_V8HImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_h_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv8hi (target, temp, GEN_INT (i))); +- break; +- +- case E_V4SImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_w_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv4si (target, temp, GEN_INT (i))); +- break; +- +- case E_V2DImode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_d_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv2di (target, temp, GEN_INT (i))); +- break; +- +- case E_V4SFmode: +- if (i == 0) +- emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv4sf (target, temp, GEN_INT (i))); +- break; +- +- case E_V2DFmode: ++ if (half_same && i == nelt / 2) ++ { ++ emit_insn ( ++ loongarch_vec_mirror (target, target, const0_rtx)); ++ return; ++ } ++ /* Assign the lowest element of val to all elements of ++ target. */ + if (i == 0) +- emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp)); +- else +- emit_insn (gen_vec_setv2df (target, temp, GEN_INT (i))); +- break; +- +- default: +- gcc_unreachable (); ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val[0]); ++ emit_insn (loongarch_vec_repl1_128 (target, op0)); ++ } ++ else if (!rtx_equal_p (val[i], val[0])) ++ { ++ op0 = gen_reg_rtx (imode); ++ emit_move_insn (op0, val[i]); ++ emit_insn ( ++ loongarch_vec_set128 (target, op0, GEN_INT (i))); ++ } + } + } + } +@@ -10634,8 +10602,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + /* For two-part initialization, always use CONCAT. */ + if (nelt == 2) + { +- rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0)); +- rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1)); ++ rtx op0 = force_reg (imode, val[0]); ++ rtx op1 = force_reg (imode, val[1]); + x = gen_rtx_VEC_CONCAT (vmode, op0, op1); + emit_insn (gen_rtx_SET (target, x)); + return; +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index fb4d228ba..075f6ba56 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -176,6 +176,8 @@ + UNSPEC_LSX_VSSRARNI + UNSPEC_LSX_VSSRARNI2 + UNSPEC_LSX_VPERMI ++ UNSPEC_LSX_VILVL_INTERNAL ++ UNSPEC_LSX_VREPLVEI_MIRROR + ]) + + ;; This attribute gives suffix for integers in VHMODE. +@@ -1551,6 +1553,18 @@ + [(set_attr "type" "simd_flog2") + (set_attr "mode" "")]) + ++;; Only for loongarch_expand_vector_init in loongarch.cc. ++;; Merge two scalar floating-point op1 and op2 into a LSX op0. ++(define_insn "lsx_vilvl__internal" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand: 1 "register_operand" "f") ++ (match_operand: 2 "register_operand" "f")] ++ UNSPEC_LSX_VILVL_INTERNAL))] ++ "ISA_HAS_LSX" ++ "vilvl.\t%w0,%w2,%w1" ++ [(set_attr "type" "simd_permute") ++ (set_attr "mode" "")]) ++ + (define_insn "smax3" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (smax:FLSX (match_operand:FLSX 1 "register_operand" "f") +@@ -2289,6 +2303,16 @@ + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + ++(define_insn "lsx_vreplvei_mirror_" ++ [(set (match_operand:LSX 0 "register_operand" "=f") ++ (unspec: LSX [(match_operand:LSX 1 "register_operand" "f") ++ (match_operand 2 "const__operand" "")] ++ UNSPEC_LSX_VREPLVEI_MIRROR))] ++ "ISA_HAS_LSX" ++ "vreplvei.d\t%w0,%w1,%2" ++ [(set_attr "type" "simd_splat") ++ (set_attr "mode" "")]) ++ + (define_insn "lsx_vreplvei_" + [(set (match_operand:LSX 0 "register_operand" "=f") + (vec_duplicate:LSX +@@ -2450,6 +2474,99 @@ + DONE; + }) + ++;; Implement vec_concatv2df by vilvl.d. ++(define_insn_and_split "vec_concatv2df" ++ [(set (match_operand:V2DF 0 "register_operand" "=f") ++ (vec_concat:V2DF ++ (match_operand:DF 1 "register_operand" "f") ++ (match_operand:DF 2 "register_operand" "f")))] ++ "ISA_HAS_LSX" ++ "" ++ "&& reload_completed" ++ [(const_int 0)] ++{ ++ emit_insn (gen_lsx_vilvl_d_f (operands[0], ++ gen_rtx_REG (V2DFmode, REGNO (operands[1])), ++ gen_rtx_REG (V2DFmode, REGNO (operands[2])))); ++ DONE; ++} ++ [(set_attr "mode" "V2DF")]) ++ ++;; Implement vec_concatv4sf. ++;; Optimize based on hardware register allocation of operands. ++(define_insn_and_split "vec_concatv4sf" ++ [(set (match_operand:V4SF 0 "register_operand" "=f") ++ (vec_concat:V4SF ++ (vec_concat:V2SF ++ (match_operand:SF 1 "register_operand" "f") ++ (match_operand:SF 2 "register_operand" "f")) ++ (vec_concat:V2SF ++ (match_operand:SF 3 "register_operand" "f") ++ (match_operand:SF 4 "register_operand" "f"))))] ++ "ISA_HAS_LSX" ++ "" ++ "&& reload_completed" ++ [(const_int 0)] ++{ ++ operands[5] = GEN_INT (1); ++ operands[6] = GEN_INT (2); ++ operands[7] = GEN_INT (4); ++ operands[8] = GEN_INT (8); ++ ++ /* If all input are same, use vreplvei.w to broadcast. */ ++ if (REGNO (operands[1]) == REGNO (operands[2]) ++ && REGNO (operands[1]) == REGNO (operands[3]) ++ && REGNO (operands[1]) == REGNO (operands[4])) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands[0], operands[1])); ++ } ++ /* If op0 is equal to op3, use vreplvei.w to set each element of op0 as op3. ++ If other input is different from op3, use vextrins.w to insert. */ ++ else if (REGNO (operands[0]) == REGNO (operands[3])) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands[0], operands[3])); ++ if (REGNO (operands[1]) != REGNO (operands[3])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[1], ++ operands[0], operands[5])); ++ if (REGNO (operands[2]) != REGNO (operands[3])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[2], ++ operands[0], operands[6])); ++ if (REGNO (operands[4]) != REGNO (operands[3])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[4], ++ operands[0], operands[8])); ++ } ++ /* If op0 is equal to op4, use vreplvei.w to set each element of op0 as op4. ++ If other input is different from op4, use vextrins.w to insert. */ ++ else if (REGNO (operands[0]) == REGNO (operands[4])) ++ { ++ emit_insn (gen_lsx_vreplvei_w_f_scalar (operands[0], operands[4])); ++ if (REGNO (operands[1]) != REGNO (operands[4])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[1], ++ operands[0], operands[5])); ++ if (REGNO (operands[2]) != REGNO (operands[4])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[2], ++ operands[0], operands[6])); ++ if (REGNO (operands[3]) != REGNO (operands[4])) ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[3], ++ operands[0], operands[7])); ++ } ++ /* Otherwise, use vilvl.w to merge op1 and op2 first. ++ If op3 is different from op1, use vextrins.w to insert. ++ If op4 is different from op2, use vextrins.w to insert. */ ++ else ++ { ++ emit_insn ( ++ gen_lsx_vilvl_w_f (operands[0], ++ gen_rtx_REG (V4SFmode, REGNO (operands[1])), ++ gen_rtx_REG (V4SFmode, REGNO (operands[2])))); ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[3], ++ operands[0], operands[7])); ++ emit_insn (gen_lsx_vextrins_w_f_scalar (operands[0], operands[4], ++ operands[0], operands[8])); ++ } ++ DONE; ++} ++ [(set_attr "mode" "V4SF")]) + + (define_insn "vandn3" + [(set (match_operand:LSX 0 "register_operand" "=f") +@@ -4465,3 +4582,20 @@ + "vpermi.w\t%w0,%w2,%3" + [(set_attr "type" "simd_bit") + (set_attr "mode" "V4SI")]) ++ ++;; Delete one of two instructions that exactly play the same role. ++(define_peephole2 ++ [(set (match_operand:V2DI 0 "register_operand") ++ (vec_duplicate:V2DI (match_operand:DI 1 "register_operand"))) ++ (set (match_operand:V2DI 2 "register_operand") ++ (vec_merge:V2DI ++ (vec_duplicate:V2DI (match_operand:DI 3 "register_operand")) ++ (match_operand:V2DI 4 "register_operand") ++ (match_operand 5 "const_int_operand")))] ++ "operands[0] == operands[2] && ++ operands[1] == operands[3] && ++ operands[2] == operands[4] && ++ INTVAL (operands[5]) == 2" ++ [(set (match_dup 0) ++ (vec_duplicate:V2DI (match_dup 1)))] ++ "") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c +new file mode 100644 +index 000000000..487816a48 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-construct-opt.c +@@ -0,0 +1,102 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++ ++#include ++ ++extern long long *x_di; ++extern int *x_si; ++extern short int *x_hi; ++extern char *x_qi; ++extern double *y_df; ++extern float *y_sf; ++ ++/* Remove some unnecessary vinsgr2vr.d as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v4i64:.*\tvinsgr2vr\\.d.*v4i64" } } */ ++/* { dg-final { scan-assembler-times "v4i64:.*\txvldrepl\\.d.*v4i64" 1 } } */ ++v4i64 ++vec_construct_v4i64 () ++{ ++ v4i64 res = ++ { x_di[0], x_di[0], x_di[1], x_di[1] } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.w as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v8i32:.*\tvinsgr2vr\\.w.*v8i32" } } */ ++/* { dg-final { scan-assembler-times "v8i32:.*\txvreplgr2vr\\.w.*v8i32" 1 } } */ ++v8i32 ++vec_construct_v8i32 () ++{ ++ v8i32 res = ++ { x_si[0], x_si[0], x_si[0], x_si[0], ++ x_si[0], x_si[2], x_si[0], x_si[0] } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.h as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v16i16:.*\tvori\\.b.*v16i16" } } */ ++/* { dg-final { scan-assembler-times "v16i16:.*\txvreplgr2vr\\.h.*v16i1" 1 } } */ ++v16i16 ++vec_construct_v16i16 () ++{ ++ v16i16 res = ++ { x_hi[1], x_hi[2], x_hi[1], x_hi[1], ++ x_hi[1], x_hi[1], x_hi[1], x_hi[1], ++ x_hi[1], x_hi[1], x_hi[1], x_hi[1], ++ x_hi[1], x_hi[1], x_hi[1], x_hi[2] } ++ ; ++ return res; ++} ++ ++/* Remove some unnecessary vinsgr2vr.b as the corresponding elements ++ have already been set. */ ++/* { dg-final { scan-assembler-not "v32i8:.*\tvori\\.b.*v32i8" } } */ ++/* { dg-final { scan-assembler-times "v32i8:.*\txvreplgr2vr\\.b.*v32i8" 1 } } */ ++v32i8 ++vec_construct_v32i8 () ++{ ++ v32i8 res = ++ { x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[2], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[0], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[3] } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.d ++ and reducing more vextrins.d. */ ++/* { dg-final { scan-assembler-not "v4f64:.*\tvori\\.b.*v4f64" } } */ ++/* { dg-final { scan-assembler-not "v4f64:.*\tvextrins\\.d.*v4f64" } } */ ++/* { dg-final { scan-assembler-times "v4f64:.*\tvilvl\\.d.*v4f64" 1 } } */ ++v4f64 ++vec_construct_v4f64 () ++{ ++ v4f64 res = ++ { y_df[0], y_df[2], y_df[0], y_df[0]} ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.w ++ and reducing more vextrins.w. */ ++/* { dg-final { scan-assembler-not "v8f32:.*\tvextrins\\.w.*v8f32" } } */ ++/* { dg-final { scan-assembler-times "v8f32:.*\txvilvl\\.w.*v8f32" 1 } } */ ++v8f32 ++vec_construct_v8f32 () ++{ ++ v8f32 res = ++ { y_sf[2], y_sf[1], y_sf[2], y_sf[3], ++ y_sf[2], y_sf[1], y_sf[2], y_sf[3] } ++ ; ++ return res; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c +new file mode 100644 +index 000000000..92da1c8af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vec-construct-opt.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -O3" } */ ++ ++#include ++ ++extern long long *x_di; ++extern int *x_si; ++extern short int *x_hi; ++extern char *x_qi; ++extern double *y_df; ++extern float *y_sf; ++ ++/* No change for V2DI mode. */ ++v2i64 ++vec_construct_v2i64 () ++{ ++ v2i64 res = ++ { x_di[1], x_di[0]} ++ ; ++ return res; ++} ++ ++/* Only load the lowest 2 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.w. */ ++/* { dg-final { scan-assembler-times "v4i32:.*\tvreplvei\\.d.*v4i32" 1 } } */ ++v4i32 ++vec_construct_v4i32 () ++{ ++ v4i32 res = ++ { x_si[0], x_si[1], x_si[0], x_si[1]} ++ ; ++ return res; ++} ++ ++/* Only load the lowest 4 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.h. */ ++/* { dg-final { scan-assembler-times "v8i16:.*\tvreplvei\\.d.*v8i16" 1 } } */ ++v8i16 ++vec_construct_v8i16 () ++{ ++ v8i16 res = ++ { x_hi[0], x_hi[0], x_hi[0], x_hi[1], ++ x_hi[0], x_hi[0], x_hi[0], x_hi[1] } ++ ; ++ return res; ++} ++ ++/* Only load the lowest 8 elements and directly copy them to high half-part, ++ reducing more vinsgr2vr.b. */ ++/* { dg-final { scan-assembler-times "v16i8:.*\tvreplvei\\.d.*v16i8" 1 } } */ ++v16i8 ++vec_construct_v16i8 () ++{ ++ v16i8 res = ++ { x_qi[0], x_qi[1], x_qi[0], x_qi[2], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[3], ++ x_qi[0], x_qi[1], x_qi[0], x_qi[2], ++ x_qi[0], x_qi[0], x_qi[0], x_qi[3] } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.d. */ ++/* { dg-final { scan-assembler-not "v2f64:.*\tvextrins\\.d.*v2f64" } } */ ++/* { dg-final { scan-assembler-times "v2f64:.*\tvilvl\\.d.*v2f64" 1 } } */ ++v2f64 ++vec_construct_v2f64 () ++{ ++ v2f64 res = ++ { y_df[0], y_df[2] } ++ ; ++ return res; ++} ++ ++/* Set 2 elements of a vector simultaneously by vilvl.w ++ and reducing more vextrins.w. */ ++/* { dg-final { scan-assembler-times "v4f32:.*\tvilvl\\.w.*v4f32" 1 } } */ ++v4f32 ++vec_construct_v4f32 () ++{ ++ v4f32 res = ++ { y_sf[0], y_sf[1], y_sf[0], y_sf[0] } ++ ; ++ return res; ++} +-- +2.43.0 + diff --git a/0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch b/0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch new file mode 100644 index 0000000..1660b3b --- /dev/null +++ b/0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch @@ -0,0 +1,51 @@ +From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 2 Oct 2023 18:51:00 +0800 +Subject: [PATCH 005/188] LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL + +When I added copysign support for LoongArch (r13-3702), we did not have +a copysign RTL insn, so I had to use UNSPEC to represent the copysign +instruction. Now the copysign RTX code has been added in r14-1586, so +this patch removes those UNSPECs, and it uses the native RTL copysign +insn. + +Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign +RTL" [1] from Michael Meissner. + +[1]: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete. + (copysign3): Use copysign RTL instead of UNSPEC. +--- + gcc/config/loongarch/loongarch.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 63ff32e75..73e2cbe0b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -37,7 +37,6 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN +- UNSPEC_FCOPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -1129,9 +1128,8 @@ + + (define_insn "copysign3" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f")] +- UNSPEC_FCOPYSIGN))] ++ (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "fcopysign.\t%0,%1,%2" + [(set_attr "type" "fcopysign") +-- +2.43.0 + diff --git a/0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch b/0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch new file mode 100644 index 0000000..045fe6d --- /dev/null +++ b/0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch @@ -0,0 +1,71 @@ +From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Wed, 11 Oct 2023 17:59:53 +0800 +Subject: [PATCH 006/188] LoongArch: Adjust makefile dependency for loongarch + headers. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-driver.h to tm_files. + * config/loongarch/loongarch.h: Do not include loongarch-driver.h. + * config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H) + instead of $(TM_H) for building generator programs. +--- + gcc/config.gcc | 4 ++-- + gcc/config/loongarch/loongarch.h | 3 --- + gcc/config/loongarch/t-loongarch | 3 ++- + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index e34a5fbb9..11ab620d0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes +@@ -2521,7 +2521,7 @@ loongarch*-*-linux*) + + loongarch*-*-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file}" +- tm_file="${tm_file} loongarch/elf.h loongarch/linux.h" ++ tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" + gnu_ld=yes + gas=yes +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a443a6427..a2dc4ba8c 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3. If not see + + #define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +-/* Driver native functions for SPEC processing in the GCC driver. */ +-#include "loongarch-driver.h" +- + /* This definition replaces the formerly used 'm' constraint with a + different constraint letter in order to avoid changing semantics of + the 'm' constraint when accepting new address formats in +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 28cfb49df..12734c37b 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -16,7 +16,8 @@ + # along with GCC; see the file COPYING3. If not see + # . + +-TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h ++ ++GTM_H += loongarch-multilib.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-tune.h + +-- +2.43.0 + diff --git a/0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch b/0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch new file mode 100644 index 0000000..6cc7264 --- /dev/null +++ b/0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch @@ -0,0 +1,65 @@ +From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 25 Oct 2024 00:58:01 +0000 +Subject: [PATCH 007/188] LoongArch: Enable vect.exp for LoongArch. [PR111424] + +gcc/testsuite/ChangeLog: + + PR target/111424 + * lib/target-supports.exp: Enable vect.exp for LoongArch. +--- + gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 192e0aded..bbe145c1c 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } { + } + } elseif [istarget amdgcn-*-*] { + set dg-do-what-default run ++ } elseif [istarget loongarch*-*-*] { ++ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx" ++ if [check_effective_target_loongarch_asx_hw] { ++ set dg-do-what-default run ++ } else { ++ set dg-do-what-default compile ++ } + } else { + return 0 + } +@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } { + return 1 + } + ++proc check_effective_target_loongarch_sx_hw { } { ++ return [check_runtime loongarch_sx_hw { ++ #include ++ int main (void) ++ { ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++ } ++ } "-mlsx"] ++} ++ ++proc check_effective_target_loongarch_asx_hw { } { ++ return [check_runtime loongarch_asx_hw { ++ #include ++ int main (void) ++ { ++ __m256i a, b, c; ++ c = __lasx_xvand_v (a, b); ++ return 0; ++ } ++ } "-mlasx"] ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 + diff --git a/0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch b/0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch new file mode 100644 index 0000000..837cdfd --- /dev/null +++ b/0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch @@ -0,0 +1,48 @@ +From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 15 Sep 2023 11:56:01 +0800 +Subject: [PATCH 008/188] LoongArch: Delete macro definition + ASM_OUTPUT_ALIGN_WITH_NOP. + +There are two reasons for removing this macro definition: +1. The default in the assembler is to use the nop instruction for filling. +2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]] + The third expression it is the maximum number of bytes that should be + skipped by this alignment directive. + Therefore, it will affect the display of the specified alignment rules + and affect the operating efficiency. + +This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c. +(Since the assembler will add nop based on the .align information when doing relax, +it will cause the conditional branch to go out of bounds during the assembly process. +This submission of binutils solves this problem.) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP): + Delete. + +Co-authored-by: Chenghua Xu +--- + gcc/config/loongarch/loongarch.h | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index a2dc4ba8c..572b538be 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1058,11 +1058,6 @@ typedef struct { + + #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG)) + +-/* "nop" instruction 54525952 (andi $r0,$r0,0) is +- used for padding. */ +-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \ +- fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) +- + /* This is how to output an assembler line to advance the location + counter by SIZE bytes. */ + +-- +2.43.0 + diff --git a/0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch b/0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch new file mode 100644 index 0000000..cbb6266 --- /dev/null +++ b/0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch @@ -0,0 +1,105 @@ +From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Wed, 11 Oct 2023 16:41:25 +0800 +Subject: [PATCH 009/188] LoongArch: Fix vec_initv32qiv16qi template to avoid + ICE. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Following test code triggers unrecognized insn ICE on LoongArch target +with "-O3 -mlasx": + +void +foo (unsigned char *dst, unsigned char *src) +{ + for (int y = 0; y < 16; y++) + { + for (int x = 0; x < 16; x++) + dst[x] = src[x] + 1; + dst += 32; + src += 32; + } +} + +ICE info: +./test.c: In function ‘foo’: +./test.c:8:1: error: unrecognizable insn: + 8 | } + | ^ +(insn 15 14 16 4 (set (reg:V32QI 185 [ vect__24.7 ]) + (vec_concat:V32QI (reg:V16QI 186) + (const_vector:V16QI [ + (const_int 0 [0]) repeated x16 + ]))) "./test.c":4:19 -1 + (nil)) +during RTL pass: vregs +./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791 +0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108 +0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*) + /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116 +0x120a03c5b extract_insn(rtx_insn*) + /home/panchenghui/upstream/gcc/gcc/recog.cc:2791 +0x12067ff73 instantiate_virtual_regs_in_insn + /home/panchenghui/upstream/gcc/gcc/function.cc:1610 +0x12067ff73 instantiate_virtual_regs + /home/panchenghui/upstream/gcc/gcc/function.cc:1983 +0x12067ff73 execute + /home/panchenghui/upstream/gcc/gcc/function.cc:2030 + +This RTL is generated inside loongarch_expand_vector_group_init function (related +to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments +are register type. This patch adds force_reg() to the vec_concat argument generation. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + fix impl related to vec_initv32qiv16qi template to avoid ICE. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + .../loongarch/vector/lasx/lasx-vec-init-1.c | 14 ++++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 760b12268..9a629a999 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; ++ rtx ops[2] = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), ++ force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0], + ops[1]))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +new file mode 100644 +index 000000000..28be32982 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++void ++foo (unsigned char *dst, unsigned char *src) ++{ ++ for (int y = 0; y < 16; y++) ++ { ++ for (int x = 0; x < 16; x++) ++ dst[x] = src[x] + 1; ++ dst += 32; ++ src += 32; ++ } ++} +-- +2.43.0 + diff --git a/0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch b/0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch new file mode 100644 index 0000000..45fba29 --- /dev/null +++ b/0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch @@ -0,0 +1,35 @@ +From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 17 Oct 2023 21:55:05 +0800 +Subject: [PATCH 010/188] LoongArch: Use fcmp.caf.s instead of movgr2cf for + zeroing a fcc + +During the review of an LLVM change [1], on LA464 we found that zeroing +an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. + +[1]: https://github.com/llvm/llvm-project/pull/69300 + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for + zeroing a fcc. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 73e2cbe0b..5f9e63d66 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2150,7 +2150,7 @@ + [(set (match_operand:FCC 0 "register_operand" "=z") + (const_int 0))] + "" +- "movgr2cf\t%0,$r0") ++ "fcmp.caf.s\t%0,$f0,$f0") + + ;; Conditional move instructions. + +-- +2.43.0 + diff --git a/0011-LoongArch-Implement-avg-and-sad-standard-names.patch b/0011-LoongArch-Implement-avg-and-sad-standard-names.patch new file mode 100644 index 0000000..ee1917d --- /dev/null +++ b/0011-LoongArch-Implement-avg-and-sad-standard-names.patch @@ -0,0 +1,389 @@ +From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 18 Oct 2023 17:36:12 +0800 +Subject: [PATCH 011/188] LoongArch:Implement avg and sad standard names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (avg3_ceil): New patterns. + (uavg3_ceil): Ditto. + (avg3_floor): Ditto. + (uavg3_floor): Ditto. + (usadv32qi): Ditto. + (ssadv32qi): Ditto. + * config/loongarch/lsx.md + (avg3_ceil): New patterns. + (uavg3_ceil): Ditto. + (avg3_floor): Ditto. + (uavg3_floor): Ditto. + (usadv16qi): Ditto. + (ssadv16qi): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/avg-ceil-lasx.c: New test. + * gcc.target/loongarch/avg-ceil-lsx.c: New test. + * gcc.target/loongarch/avg-floor-lasx.c: New test. + * gcc.target/loongarch/avg-floor-lsx.c: New test. + * gcc.target/loongarch/sad-lasx.c: New test. + * gcc.target/loongarch/sad-lsx.c: New test. +--- + gcc/config/loongarch/lasx.md | 78 +++++++++++++++++++ + gcc/config/loongarch/lsx.md | 78 +++++++++++++++++++ + .../gcc.target/loongarch/avg-ceil-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-ceil-lsx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lasx.c | 22 ++++++ + .../gcc.target/loongarch/avg-floor-lsx.c | 22 ++++++ + gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++ + gcc/testsuite/gcc.target/loongarch/sad-lsx.c | 20 +++++ + 8 files changed, 284 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2bc5d47ed..c7496d68a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5171,3 +5171,81 @@ + const0_rtx)); + DONE; + }) ++ ++(define_expand "avg3_ceil" ++ [(match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_s_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "uavg3_ceil" ++ [(match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavgr_u_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "avg3_floor" ++ [(match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_s_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "uavg3_floor" ++ [(match_operand:ILASX_WHB 0 "register_operand") ++ (match_operand:ILASX_WHB 1 "register_operand") ++ (match_operand:ILASX_WHB 2 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ emit_insn (gen_lasx_xvavg_u_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "usadv32qi" ++ [(match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2])); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); ++ DONE; ++}) ++ ++(define_expand "ssadv32qi" ++ [(match_operand:V8SI 0 "register_operand") ++ (match_operand:V32QI 1 "register_operand") ++ (match_operand:V32QI 2 "register_operand") ++ (match_operand:V8SI 3 "register_operand")] ++ "ISA_HAS_LASX" ++{ ++ rtx t1 = gen_reg_rtx (V32QImode); ++ rtx t2 = gen_reg_rtx (V16HImode); ++ rtx t3 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2])); ++ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); ++ DONE; ++}) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 075f6ba56..b4e92ae9c 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3581,6 +3581,84 @@ + DONE; + }) + ++(define_expand "avg3_ceil" ++ [(match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_s_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "uavg3_ceil" ++ [(match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavgr_u_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "avg3_floor" ++ [(match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_s_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "uavg3_floor" ++ [(match_operand:ILSX_WHB 0 "register_operand") ++ (match_operand:ILSX_WHB 1 "register_operand") ++ (match_operand:ILSX_WHB 2 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ emit_insn (gen_lsx_vavg_u_ (operands[0], ++ operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "usadv16qi" ++ [(match_operand:V4SI 0 "register_operand") ++ (match_operand:V16QI 1 "register_operand") ++ (match_operand:V16QI 2 "register_operand") ++ (match_operand:V4SI 3 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx t1 = gen_reg_rtx (V16QImode); ++ rtx t2 = gen_reg_rtx (V8HImode); ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2])); ++ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); ++ DONE; ++}) ++ ++(define_expand "ssadv16qi" ++ [(match_operand:V4SI 0 "register_operand") ++ (match_operand:V16QI 1 "register_operand") ++ (match_operand:V16QI 2 "register_operand") ++ (match_operand:V4SI 3 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ rtx t1 = gen_reg_rtx (V16QImode); ++ rtx t2 = gen_reg_rtx (V8HImode); ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2])); ++ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); ++ DONE; ++}) ++ + (define_insn "lsx_vwev_d_w" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (addsubmul:V2DI +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c +new file mode 100644 +index 000000000..16db7bf72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvavgr.b" } } */ ++/* { dg-final { scan-assembler "xvavgr.bu" } } */ ++/* { dg-final { scan-assembler "xvavgr.hu" } } */ ++/* { dg-final { scan-assembler "xvavgr.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c +new file mode 100644 +index 000000000..94119c23b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++/* { dg-final { scan-assembler "vavgr.b" } } */ ++/* { dg-final { scan-assembler "vavgr.bu" } } */ ++/* { dg-final { scan-assembler "vavgr.hu" } } */ ++/* { dg-final { scan-assembler "vavgr.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c +new file mode 100644 +index 000000000..da6896531 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvavg.b" } } */ ++/* { dg-final { scan-assembler "xvavg.bu" } } */ ++/* { dg-final { scan-assembler "xvavg.hu" } } */ ++/* { dg-final { scan-assembler "xvavg.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c +new file mode 100644 +index 000000000..bbb9db527 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++/* { dg-final { scan-assembler "vavg.b" } } */ ++/* { dg-final { scan-assembler "vavg.bu" } } */ ++/* { dg-final { scan-assembler "vavg.hu" } } */ ++/* { dg-final { scan-assembler "vavg.h" } } */ ++ ++#define N 1024 ++ ++#define TEST(TYPE, NAME) \ ++ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \ ++ void f_##NAME (void) \ ++ { \ ++ int i; \ ++ for (i = 0; i < N; i++) \ ++ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1; \ ++ } ++ ++TEST(char, 1); ++TEST(short, 2); ++TEST(unsigned char, 3); ++TEST(unsigned short, 4); +diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lasx.c b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c +new file mode 100644 +index 000000000..6c0cdfd97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++ ++#define N 1024 ++ ++#define TEST(SIGN) \ ++ SIGN char a_##SIGN[N], b_##SIGN[N]; \ ++ int f_##SIGN (void) \ ++ { \ ++ int i, sum = 0; \ ++ for (i = 0; i < N; i++) \ ++ sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);; \ ++ return sum; \ ++ } ++ ++TEST(signed); ++TEST(unsigned); ++ ++/* { dg-final { scan-assembler {\txvabsd.bu\t} } } */ ++/* { dg-final { scan-assembler {\txvabsd.b\t} } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lsx.c b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c +new file mode 100644 +index 000000000..b92110a8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++ ++#define N 1024 ++ ++#define TEST(SIGN) \ ++ SIGN char a_##SIGN[N], b_##SIGN[N]; \ ++ int f_##SIGN (void) \ ++ { \ ++ int i, sum = 0; \ ++ for (i = 0; i < N; i++) \ ++ sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);; \ ++ return sum; \ ++ } ++ ++TEST(signed); ++TEST(unsigned); ++ ++/* { dg-final { scan-assembler {\tvabsd.bu\t} } } */ ++/* { dg-final { scan-assembler {\tvabsd.b\t} } } */ +-- +2.43.0 + diff --git a/0012-LoongArch-Implement-vec_widen-standard-names.patch b/0012-LoongArch-Implement-vec_widen-standard-names.patch new file mode 100644 index 0000000..dc19fc2 --- /dev/null +++ b/0012-LoongArch-Implement-vec_widen-standard-names.patch @@ -0,0 +1,403 @@ +From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 18 Oct 2023 17:39:40 +0800 +Subject: [PATCH 012/188] LoongArch:Implement vec_widen standard names. + +Add support for vec_widen lo/hi patterns. These do not directly +match on Loongarch lasx instructions but can be emulated with +even/odd + vector merge. + +gcc/ChangeLog: + + * config/loongarch/lasx.md + (vec_widen_mult_even_v8si): New patterns. + (vec_widen_add_hi_): Ditto. + (vec_widen_add_lo_): Ditto. + (vec_widen_sub_hi_): Ditto. + (vec_widen_sub_lo_): Ditto. + (vec_widen_mult_hi_): Ditto. + (vec_widen_mult_lo_): Ditto. + * config/loongarch/loongarch.md (u_bool): New iterator. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_widen_hilo): New prototype. + * config/loongarch/loongarch.cc + (loongarch_expand_vec_interleave): New function. + (loongarch_expand_vec_widen_hilo): New function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-widen-add.c: New test. + * gcc.target/loongarch/vect-widen-mul.c: New test. + * gcc.target/loongarch/vect-widen-sub.c: New test. +--- + gcc/config/loongarch/lasx.md | 82 ++++++++--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 137 ++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 2 + + .../gcc.target/loongarch/vect-widen-add.c | 24 +++ + .../gcc.target/loongarch/vect-widen-mul.c | 24 +++ + .../gcc.target/loongarch/vect-widen-sub.c | 24 +++ + 7 files changed, 277 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index c7496d68a..442fda246 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5048,23 +5048,71 @@ + [(set_attr "type" "simd_store") + (set_attr "mode" "DI")]) + +-(define_insn "vec_widen_mult_even_v8si" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (mult:V4DI +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 1 "register_operand" "%f") +- (parallel [(const_int 0) (const_int 2) +- (const_int 4) (const_int 6)]))) +- (any_extend:V4DI +- (vec_select:V4SI +- (match_operand:V8SI 2 "register_operand" "f") +- (parallel [(const_int 0) (const_int 2) +- (const_int 4) (const_int 6)])))))] +- "ISA_HAS_LASX" +- "xvmulwev.d.w\t%u0,%u1,%u2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "V4DI")]) ++(define_expand "vec_widen_add_hi_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , true, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_add_lo_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , false, "add"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_sub_hi_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , true, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_sub_lo_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , false, "sub"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_mult_hi_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , true, "mult"); ++ DONE; ++}) ++ ++(define_expand "vec_widen_mult_lo_" ++ [(match_operand: 0 "register_operand") ++ (any_extend: (match_operand:ILASX_HB 1 "register_operand")) ++ (any_extend: (match_operand:ILASX_HB 2 "register_operand"))] ++ "ISA_HAS_LASX" ++{ ++ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], ++ , false, "mult"); ++ DONE; ++}) + + ;; Vector reduction operation + (define_expand "reduc_plus_scal_v4di" +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index ea61cf567..163162598 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, + rtx *); ++extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *); + + /* Routines implemented in loongarch-c.c. */ + void loongarch_cpu_cpp_builtins (cpp_reader *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9a629a999..c0f58f9a9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) + return loongarch_expand_vec_perm_even_odd_1 (d, odd); + } + ++static void ++loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p) ++{ ++ struct expand_vec_perm_d d; ++ unsigned i, nelt, base; ++ bool ok; ++ ++ d.target = target; ++ d.op0 = op0; ++ d.op1 = op1; ++ d.vmode = GET_MODE (target); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.one_vector_p = false; ++ d.testing_p = false; ++ ++ base = high_p ? nelt / 2 : 0; ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ d.perm[i * 2] = i + base; ++ d.perm[i * 2 + 1] = i + base + nelt; ++ } ++ ++ ok = loongarch_expand_vec_perm_interleave (&d); ++ gcc_assert (ok); ++} ++ ++/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd ++ parts of the double sized result elements in the corresponding elements of ++ the target register. That's NOT what the vec_widen_umult_lo/hi patterns are ++ expected to do. We emulate the widening lo/hi multiplies with the even/odd ++ versions followed by a vector merge. */ ++ ++void ++loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2, ++ bool uns_p, bool high_p, const char *optab) ++{ ++ machine_mode wmode = GET_MODE (dest); ++ machine_mode mode = GET_MODE (op1); ++ rtx t1, t2, t3; ++ ++ t1 = gen_reg_rtx (wmode); ++ t2 = gen_reg_rtx (wmode); ++ t3 = gen_reg_rtx (wmode); ++ switch (mode) ++ { ++ case V16HImode: ++ if (!strcmp (optab, "add")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "mult")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "sub")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2)); ++ } ++ } ++ break; ++ ++ case V32QImode: ++ if (!strcmp (optab, "add")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "mult")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2)); ++ } ++ } ++ else if (!strcmp (optab, "sub")) ++ { ++ if (!uns_p) ++ { ++ emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2)); ++ } ++ else ++ { ++ emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2)); ++ emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2)); ++ } ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ loongarch_expand_vec_interleave (t3, t1, t2, high_p); ++ emit_move_insn (dest, gen_lowpart (wmode, t3)); ++} ++ + /* Expand a variable vector permutation for LASX. */ + + void +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 5f9e63d66..29ac950bf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -509,6 +509,8 @@ + ;; is like , but the signed form expands to "s" rather than "". + (define_code_attr su [(sign_extend "s") (zero_extend "u")]) + ++(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")]) ++ + ;; expands to the name of the optab for a particular code. + (define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c +new file mode 100644 +index 000000000..0bf832d0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvaddwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvaddwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvaddwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvaddwod.w.hu" } } */ ++ ++#include ++ ++#define SIZE 1024 ++ ++void ++wide_uadd (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] + b[i]; ++} ++ ++void ++wide_sadd (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] + b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c +new file mode 100644 +index 000000000..84b020eea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvmulwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvmulwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvmulwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvmulwod.w.hu" } } */ ++ ++#include ++ ++#define SIZE 1024 ++ ++void ++wide_umul (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] * b[i]; ++} ++ ++void ++wide_smul (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] * b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c +new file mode 100644 +index 000000000..69fc3a517 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvsubwev.w.h" } } */ ++/* { dg-final { scan-assembler "xvsubwod.w.h" } } */ ++/* { dg-final { scan-assembler "xvsubwev.w.hu" } } */ ++/* { dg-final { scan-assembler "xvsubwod.w.hu" } } */ ++ ++#include ++ ++#define SIZE 1024 ++ ++void ++wide_usub (uint32_t *foo, uint16_t *a, uint16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] - b[i]; ++} ++ ++void ++wide_ssub (int32_t *foo, int16_t *a, int16_t *b) ++{ ++ for ( int i = 0; i < SIZE; i++) ++ foo[i] = a[i] - b[i]; ++} +-- +2.43.0 + diff --git a/0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch b/0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch new file mode 100644 index 0000000..14e8683 --- /dev/null +++ b/0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch @@ -0,0 +1,354 @@ +From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 18 Oct 2023 17:43:39 +0800 +Subject: [PATCH 013/188] LoongArch:Implement the new vector cost model + framework. + +This patch make loongarch use the new vector hooks and implements the costing +function determine_suggested_unroll_factor, to make it be able to suggest the +unroll factor for a given loop being vectorized base vec_ops analysis during +vector costing and the available issue information. Referring to aarch64 and +rs6000 port. + +The patch also reduces the cost of unaligned stores, making it equal to the +cost of aligned ones in order to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from + vector_costs. Add a constructor. + (loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to + adjust the cost for inner loops. + (loongarch_vector_costs::count_operations): New function. + (loongarch_vector_costs::determine_suggested_unroll_factor): Ditto. + (loongarch_vector_costs::finish_cost): Ditto. + (loongarch_builtin_vectorization_cost): Adjust. + * config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * config/loongarch/genopts/loongarch.opt.in + (loongarch-vect-unroll-limit): Ditto. + (loongarcg-vect-issue-info): Ditto. + (mmemvec-cost): Delete. + * doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 15 +- + gcc/config/loongarch/loongarch.cc | 173 ++++++++++++++++-- + gcc/config/loongarch/loongarch.opt | 15 +- + gcc/doc/invoke.texi | 7 + + 4 files changed, 188 insertions(+), 22 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f18733c24..74cf4a7f7 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -152,10 +152,6 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(loongarch_branch_cost) + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +-mmemvec-cost= +-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) +-mmemvec-cost=COST Set the cost of vector memory access instructions. +- + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) + Trap on integer divide by zero. +@@ -219,3 +215,14 @@ mrelax + Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. ++ ++-param=loongarch-vect-unroll-limit= ++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Used to limit unroll factor which indicates how much the autovectorizer may ++unroll a loop. The default value is 6. ++ ++-param=loongarch-vect-issue-info= ++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Indicate how many non memory access vector instructions can be issued per ++cycle, it's used in unroll factor determination for autovectorizer. The ++default value is 4. +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c0f58f9a9..e22a64600 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3. If not see + #include "rtl-iter.h" + #include "opts.h" + #include "function-abi.h" ++#include "cfgloop.h" ++#include "tree-vectorizer.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + } + } + +-/* Vectorizer cost model implementation. */ +- + /* Implement targetm.vectorize.builtin_vectorization_cost. */ + + static int +@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case vector_load: + case vec_to_scalar: + case scalar_to_vec: +- case cond_branch_not_taken: +- case vec_promote_demote: + case scalar_store: + case vector_store: + return 1; + ++ case vec_promote_demote: + case vec_perm: + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + + case unaligned_load: +- case vector_gather_load: +- return 2; +- + case unaligned_store: +- case vector_scatter_store: +- return 10; ++ return 2; + + case cond_branch_taken: +- return 3; ++ return 4; ++ ++ case cond_branch_not_taken: ++ return 2; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); +- return elements / 2 + 1; ++ if (ISA_HAS_LASX) ++ return elements + 1; ++ else ++ return elements; + + default: + gcc_unreachable (); + } + } + ++class loongarch_vector_costs : public vector_costs ++{ ++public: ++ using vector_costs::vector_costs; ++ ++ unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, slp_tree, tree vectype, ++ int misalign, ++ vect_cost_model_location where) override; ++ void finish_cost (const vector_costs *) override; ++ ++protected: ++ void count_operations (vect_cost_for_stmt, stmt_vec_info, ++ vect_cost_model_location, unsigned int); ++ unsigned int determine_suggested_unroll_factor (loop_vec_info); ++ /* The number of vectorized stmts in loop. */ ++ unsigned m_stmts = 0; ++ /* The number of load and store operations in loop. */ ++ unsigned m_loads = 0; ++ unsigned m_stores = 0; ++ /* Reduction factor for suggesting unroll factor. */ ++ unsigned m_reduc_factor = 0; ++ /* True if the loop contains an average operation. */ ++ bool m_has_avg =false; ++}; ++ ++/* Implement TARGET_VECTORIZE_CREATE_COSTS. */ ++static vector_costs * ++loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) ++{ ++ return new loongarch_vector_costs (vinfo, costing_for_scalar); ++} ++ ++void ++loongarch_vector_costs::count_operations (vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, ++ vect_cost_model_location where, ++ unsigned int count) ++{ ++ if (!m_costing_for_scalar ++ && is_a (m_vinfo) ++ && where == vect_body) ++ { ++ m_stmts += count; ++ ++ if (kind == scalar_load ++ || kind == vector_load ++ || kind == unaligned_load) ++ m_loads += count; ++ else if (kind == scalar_store ++ || kind == vector_store ++ || kind == unaligned_store) ++ m_stores += count; ++ else if ((kind == scalar_stmt ++ || kind == vector_stmt ++ || kind == vec_to_scalar) ++ && stmt_info && vect_is_reduction (stmt_info)) ++ { ++ tree lhs = gimple_get_lhs (stmt_info->stmt); ++ unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1; ++ m_reduc_factor = MAX (base * count, m_reduc_factor); ++ } ++ } ++} ++ ++unsigned int ++loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo) ++{ ++ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ++ ++ if (m_has_avg) ++ return 1; ++ ++ /* Don't unroll if it's specified explicitly not to be unrolled. */ ++ if (loop->unroll == 1 ++ || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops) ++ || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops)) ++ return 1; ++ ++ unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores; ++ /* Don't unroll if no vector instructions excepting for memory access. */ ++ if (nstmts_nonldst == 0) ++ return 1; ++ ++ /* Use this simple hardware resource model that how many non vld/vst ++ vector instructions can be issued per cycle. */ ++ unsigned int issue_info = loongarch_vect_issue_info; ++ unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1; ++ unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst); ++ uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf); ++ ++ return 1 << ceil_log2 (uf); ++} ++ ++unsigned ++loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, ++ stmt_vec_info stmt_info, slp_tree, ++ tree vectype, int misalign, ++ vect_cost_model_location where) ++{ ++ unsigned retval = 0; ++ ++ if (flag_vect_cost_model) ++ { ++ int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, ++ misalign); ++ retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); ++ m_costs[where] += retval; ++ ++ count_operations (kind, stmt_info, where, count); ++ } ++ ++ if (stmt_info) ++ { ++ /* Detect the use of an averaging operation. */ ++ gimple *stmt = stmt_info->stmt; ++ if (is_gimple_call (stmt) ++ && gimple_call_internal_p (stmt)) ++ { ++ switch (gimple_call_internal_fn (stmt)) ++ { ++ case IFN_AVG_FLOOR: ++ case IFN_AVG_CEIL: ++ m_has_avg = true; ++ default: ++ break; ++ } ++ } ++ } ++ ++ return retval; ++} ++ ++void ++loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) ++{ ++ loop_vec_info loop_vinfo = dyn_cast (m_vinfo); ++ if (loop_vinfo) ++ { ++ m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); ++ } ++ ++ vector_costs::finish_cost (scalar_costs); ++} ++ + /* Implement TARGET_ADDRESS_COST. */ + + static int +@@ -7261,9 +7407,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) + error ("%qs cannot be used for compiling a shared library", + "-mdirect-extern-access"); +- if (loongarch_vector_access_cost == 0) +- loongarch_vector_access_cost = 5; +- + + switch (la_target.cmodel) + { +@@ -11275,6 +11418,8 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost ++#undef TARGET_VECTORIZE_CREATE_COSTS ++#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs + + + #undef TARGET_IN_SMALL_DATA_P +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 78f2baf3a..34bd832bd 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -159,10 +159,6 @@ mbranch-cost= + Target RejectNegative Joined UInteger Var(loongarch_branch_cost) + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +-mmemvec-cost= +-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5) +-mmemvec-cost=COST Set the cost of vector memory access instructions. +- + mcheck-zero-division + Target Mask(CHECK_ZERO_DIV) + Trap on integer divide by zero. +@@ -226,3 +222,14 @@ mrelax + Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. ++ ++-param=loongarch-vect-unroll-limit= ++Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Used to limit unroll factor which indicates how much the autovectorizer may ++unroll a loop. The default value is 6. ++ ++-param=loongarch-vect-issue-info= ++Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Indicate how many non memory access vector instructions can be issued per ++cycle, it's used in unroll factor determination for autovectorizer. The ++default value is 4. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7eed77836..168f3d0db 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -24632,6 +24632,13 @@ environments where no dynamic link is performed, like firmwares, OS + kernels, executables linked with @option{-static} or @option{-static-pie}. + @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or + @option{-fpic}. ++ ++@item loongarch-vect-unroll-limit ++The vectorizer will use available tuning information to determine whether it ++would be beneficial to unroll the main vectorized loop and by how much. This ++parameter set's the upper bound of how much the vectorizer will unroll the main ++loop. The default value is six. ++ + @end table + + @node M32C Options +-- +2.43.0 + diff --git a/0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch b/0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch new file mode 100644 index 0000000..5ff13da --- /dev/null +++ b/0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch @@ -0,0 +1,34 @@ +From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Mon, 23 Oct 2023 09:07:32 +0800 +Subject: [PATCH 014/188] LoongArch: Define macro CLEAR_INSN_CACHE. + +LoongArch's microstructure ensures cache consistency by hardware. +Due to out-of-order execution, "ibar" is required to ensure the visibility of the +store (invalidated icache) executed by this CPU before "ibar" (to the instance). +"ibar" will not invalidate the icache, so the start and end parameters are not Affect +"ibar" performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 572b538be..f7ddfc452 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function + (TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0) + + #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) ++ ++/* LoongArch maintains ICache/DCache coherency by hardware, ++ we just need "ibar" to avoid instruction hazard here. */ ++#undef CLEAR_INSN_CACHE ++#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) +-- +2.43.0 + diff --git a/0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch b/0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch new file mode 100644 index 0000000..0786ad2 --- /dev/null +++ b/0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch @@ -0,0 +1,233 @@ +From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 23 Oct 2023 15:23:11 +0800 +Subject: [PATCH 015/188] LoongArch: Add enum-style -mexplicit-relocs= option + +To take a better balance between scheduling and relaxation when -flto is +enabled, add three-way -mexplicit-relocs={auto,none,always} options. +The old -mexplicit-relocs and -mno-explicit-relocs options are still +supported, they are mapped to -mexplicit-relocs=always and +-mexplicit-relocs=none. + +The default choice is determined by probing assembler capabilities at +build time. If the assembler does not supports explicit relocs at all, +the default will be none; if it supports explicit relocs but not +relaxation, the default will be always; if both explicit relocs and +relaxation are supported, the default will be auto. + +Currently auto is same as none. We will make auto more clever in +following changes. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Add strings for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/genopts/loongarch.opt.in: Add options for + -mexplicit-relocs={auto,none,always}. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h + (EXPLICIT_RELOCS_AUTO): Define. + (EXPLICIT_RELOCS_NONE): Define. + (EXPLICIT_RELOCS_ALWAYS): Define. + (N_EXPLICIT_RELOCS_TYPES): Define. + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Error out if the old-style + -m[no-]explicit-relocs option is used with + -mexplicit-relocs={auto,none,always} together. Map + -mno-explicit-relocs to -mexplicit-relocs=none and + -mexplicit-relocs to -mexplicit-relocs=always for backward + compatibility. Set a proper default for -mexplicit-relocs= + based on configure-time probed linker capability. Update a + diagnostic message to mention -mexplicit-relocs=always instead + of the old-style -mexplicit-relocs. + (loongarch_handle_model_attribute): Update a diagnostic message + to mention -mexplicit-relocs=always instead of the old-style + -mexplicit-relocs. + * config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define. +--- + .../loongarch/genopts/loongarch-strings | 6 +++++ + gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++-- + gcc/config/loongarch/loongarch-def.h | 6 +++++ + gcc/config/loongarch/loongarch-str.h | 5 ++++ + gcc/config/loongarch/loongarch.cc | 24 +++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 3 +++ + gcc/config/loongarch/loongarch.opt | 21 ++++++++++++++-- + 7 files changed, 80 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index eb5086fe3..6c8a42af2 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -65,3 +65,9 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme ++ ++# -mexplicit-relocs ++OPTSTR_EXPLICIT_RELOCS explicit-relocs ++STR_EXPLICIT_RELOCS_AUTO auto ++STR_EXPLICIT_RELOCS_NONE none ++STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 74cf4a7f7..e7df1964a 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -176,10 +176,27 @@ mmax-inline-memcpy-size= + Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +-mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) ++Enum ++Name(explicit_relocs) Type(int) ++The code model option names for -mexplicit-relocs: ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++ ++EnumValue ++Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++ ++mexplicit-relocs= ++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + ++mexplicit-relocs ++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Use %reloc() assembly operators (for backward compatibility). ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index eb8e53b20..4757de14b 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings[]; + #define CMODEL_EXTREME 5 + #define N_CMODEL_TYPES 6 + ++/* enum explicit_relocs */ ++#define EXPLICIT_RELOCS_AUTO 0 ++#define EXPLICIT_RELOCS_NONE 1 ++#define EXPLICIT_RELOCS_ALWAYS 2 ++#define N_EXPLICIT_RELOCS_TYPES 3 ++ + /* The common default value for variables whose assignments + are triggered by command-line options. */ + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index ecfebf9db..037e9e583 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + ++#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" ++#define STR_EXPLICIT_RELOCS_AUTO "auto" ++#define STR_EXPLICIT_RELOCS_NONE "none" ++#define STR_EXPLICIT_RELOCS_ALWAYS "always" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e22a64600..3258c8655 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + ++ if (la_opt_explicit_relocs != M_OPT_UNSET ++ && la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ error ("do not use %qs (with %qs) and %qs (without %qs) together", ++ "-mexplicit-relocs=", "=", ++ la_opt_explicit_relocs_backward ? "-mexplicit-relocs" ++ : "-mno-explicit-relocs", "="); ++ ++ if (la_opt_explicit_relocs_backward != M_OPT_UNSET) ++ la_opt_explicit_relocs = (la_opt_explicit_relocs_backward ++ ? EXPLICIT_RELOCS_ALWAYS ++ : EXPLICIT_RELOCS_NONE); ++ ++ if (la_opt_explicit_relocs == M_OPT_UNSET) ++ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (HAVE_AS_MRELAX_OPTION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); ++ + if (TARGET_ABI_LP64) + flag_pcc_struct_return = 0; + +@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + case CMODEL_EXTREME: + if (!TARGET_EXPLICIT_RELOCS) + error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs"); ++ "extreme", "-mexplicit-relocs=always"); + + if (opts->x_flag_plt) + { +@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + if (!TARGET_EXPLICIT_RELOCS) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, "-mexplicit-relocs"); ++ "%qE attribute requires %s", name, ++ "-mexplicit-relocs=always"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f7ddfc452..6e8ac293a 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function + we just need "ibar" to avoid instruction hazard here. */ + #undef CLEAR_INSN_CACHE + #define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0) ++ ++#define TARGET_EXPLICIT_RELOCS \ ++ (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 34bd832bd..44376fd77 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -183,10 +183,27 @@ mmax-inline-memcpy-size= + Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +-mexplicit-relocs +-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION) ++Enum ++Name(explicit_relocs) Type(int) ++The code model option names for -mexplicit-relocs: ++ ++EnumValue ++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO) ++ ++EnumValue ++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE) ++ ++EnumValue ++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS) ++ ++mexplicit-relocs= ++Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + ++mexplicit-relocs ++Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Use %reloc() assembly operators (for backward compatibility). ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +-- +2.43.0 + diff --git a/0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch b/0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch new file mode 100644 index 0000000..eccf107 --- /dev/null +++ b/0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch @@ -0,0 +1,212 @@ +From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 30 Sep 2023 18:46:28 +0800 +Subject: [PATCH 016/188] LoongArch: Use explicit relocs for GOT access when + -mexplicit-relocs=auto and LTO during a final link with linker plugin + +If we are performing LTO for a final link and linker plugin is enabled, +then we are sure any GOT access may resolve to a symbol out of the link +unit (otherwise the linker plugin will tell us the symbol should be +resolved locally and we'll use PC-relative access instead). + +Produce machine instructions with explicit relocs instead of la.global +for better scheduling. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_explicit_relocs_p): Declare new function. + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Implement. + (loongarch_symbol_insns): Call loongarch_explicit_relocs_p for + SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS. + (loongarch_split_symbol): Call loongarch_explicit_relocs_p for + deciding if return early, instead of using + TARGET_EXPLICIT_RELOCS. + (loongarch_output_move): CAll loongarch_explicit_relocs_p + instead of using TARGET_EXPLICIT_RELOCS. + * config/loongarch/loongarch.md (*low): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + (@ld_from_got): Likewise. + * config/loongarch/predicates.md (move_operand): Call + loongarch_explicit_relocs_p instead of using + TARGET_EXPLICIT_RELOCS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-lto.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 34 +++++++++++++++---- + gcc/config/loongarch/loongarch.md | 4 +-- + gcc/config/loongarch/predicates.md | 8 ++--- + .../loongarch/explicit-relocs-auto-lto.c | 26 ++++++++++++++ + 5 files changed, 59 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 163162598..51d38177b 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3258c8655..1d20577e7 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + gcc_unreachable (); + } + ++/* If -mexplicit-relocs=auto, we use machine operations with reloc hints ++ for cases where the linker is unable to relax so we can schedule the ++ machine operations, otherwise use an assembler pseudo-op so the ++ assembler will generate R_LARCH_RELAX. */ ++ ++bool ++loongarch_explicit_relocs_p (enum loongarch_symbol_type type) ++{ ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) ++ return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; ++ ++ /* If we are performing LTO for a final link, and we have the linker ++ plugin so we know the resolution of the symbols, then all GOT ++ references are binding to external symbols or preemptable symbols. ++ So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin) ++ && type == SYMBOL_GOT_DISP); ++} ++ + /* Returns the number of instructions necessary to reference a symbol. */ + + static int +@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + case SYMBOL_GOT_DISP: + /* The constant will have to be loaded from the GOT before it + is used in an address. */ +- if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE) ++ if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE) + return 0; + + return 3; +@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + If so, and if LOW_OUT is nonnull, emit the high part and store the + low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. + +- Return false if build with '-mno-explicit-relocs'. ++ Return false if build with '-mexplicit-relocs=none'. + + TEMP is as for loongarch_force_temporary and is used to load the high + part into a register. +@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + { + enum loongarch_symbol_type symbol_type; + +- /* If build with '-mno-explicit-relocs', don't split symbol. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) + || !loongarch_symbolic_constant_p (addr, &symbol_type) ++ || !loongarch_explicit_relocs_p (symbol_type) + || loongarch_symbol_insns (symbol_type, mode) == 0 + || !loongarch_split_symbol_type (symbol_type)) + return false; +@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src) + } + } + +- if (!TARGET_EXPLICIT_RELOCS ++ if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src)) + && dest_code == REG && symbolic_operand (src, VOIDmode)) + { + if (loongarch_classify_symbol (src) == SYMBOL_PCREL) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 29ac950bf..81c97393b 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2247,7 +2247,7 @@ + [(set (match_operand:P 0 "register_operand" "=r") + (lo_sum:P (match_operand:P 1 "register_operand" " r") + (match_operand:P 2 "symbolic_operand" "")))] +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.\t%0,%1,%L2" + [(set_attr "type" "arith") + (set_attr "mode" "")]) +@@ -2275,7 +2275,7 @@ + (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand")))] + UNSPEC_LOAD_FROM_GOT))] +- "TARGET_EXPLICIT_RELOCS" ++ "" + "ld.\t%0,%1,%L2" + [(set_attr "type" "move")] + ) +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index ad6cee5c4..6b50b3a4d 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -541,16 +541,14 @@ + case SYMBOL_REF: + case LABEL_REF: + return (loongarch_symbolic_constant_p (op, &symbol_type) +- && (!TARGET_EXPLICIT_RELOCS ++ && (!loongarch_explicit_relocs_p (symbol_type) + || !loongarch_split_symbol_type (symbol_type))); + + case HIGH: +- /* '-mno-explicit-relocs' don't generate high/low pairs. */ +- if (!TARGET_EXPLICIT_RELOCS) +- return false; +- + op = XEXP (op, 0); ++ + return (loongarch_symbolic_constant_p (op, &symbol_type) ++ && loongarch_explicit_relocs_p (symbol_type) + && loongarch_split_symbol_type (symbol_type)); + + default: +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +new file mode 100644 +index 000000000..f53b54689 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c +@@ -0,0 +1,26 @@ ++/* { dg-do link } */ ++/* { dg-require-effective-target lto } */ ++/* { dg-require-linker-plugin "" } */ ++/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */ ++ ++int pcrel __attribute__ ((visibility ("hidden"))); ++int got __attribute__ ((visibility ("default"))); ++ ++int ++*addr_pcrel (void) ++{ ++ return &pcrel; ++} ++ ++int ++*addr_got (void) ++{ ++ return &got; ++} ++ ++/* With linker plugin we should use la.local (it can be relaxed to pcaddi), ++ but not la.global (we are pretty sure the linker cannot relax la.global ++ got). */ ++/* { dg-final { scan-lto-assembler "la.local.*pcrel" } } */ ++/* { dg-final { scan-lto-assembler "pcalau12i.*%got_pc_hi20\\\(got\\\)" } } */ ++/* { dg-final { scan-lto-assembler "ld.*%got_pc_lo12\\\(got\\\)" } } */ +-- +2.43.0 + diff --git a/0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch b/0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch new file mode 100644 index 0000000..e4a1f37 --- /dev/null +++ b/0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch @@ -0,0 +1,146 @@ +From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 2 Oct 2023 13:00:18 +0800 +Subject: [PATCH 017/188] LoongArch: Use explicit relocs for TLS access with + -mexplicit-relocs=auto + +The linker does not know how to relax TLS access for LoongArch, so let's +emit machine instructions with explicit relocs for TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for TLS symbol types if -mexplicit-relocs=auto. + (loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS + with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@tls_low): Remove + TARGET_EXPLICIT_RELOCS from insn condition. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New + test. + * gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New + test. +--- + gcc/config/loongarch/loongarch.cc | 37 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 2 +- + .../explicit-relocs-auto-tls-ld-gd.c | 9 +++++ + .../explicit-relocs-auto-tls-le-ie.c | 6 +++ + 4 files changed, 40 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1d20577e7..fa5c14be6 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + +- /* If we are performing LTO for a final link, and we have the linker +- plugin so we know the resolution of the symbols, then all GOT +- references are binding to external symbols or preemptable symbols. +- So the linker cannot relax them. */ +- return (in_lto_p +- && !flag_incremental_link +- && HAVE_LTO_PLUGIN == 2 +- && (!global_options_set.x_flag_use_linker_plugin +- || global_options.x_flag_use_linker_plugin) +- && type == SYMBOL_GOT_DISP); ++ switch (type) ++ { ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLS_LE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: ++ /* The linker don't know how to relax TLS accesses. */ ++ return true; ++ case SYMBOL_GOT_DISP: ++ /* If we are performing LTO for a final link, and we have the ++ linker plugin so we know the resolution of the symbols, then ++ all GOT references are binding to external symbols or ++ preemptable symbols. So the linker cannot relax them. */ ++ return (in_lto_p ++ && !flag_incremental_link ++ && HAVE_LTO_PLUGIN == 2 ++ && (!global_options_set.x_flag_use_linker_plugin ++ || global_options.x_flag_use_linker_plugin)); ++ default: ++ return false; ++ } + } + + /* Returns the number of instructions necessary to reference a symbol. */ +@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc) + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); +@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 81c97393b..3b836d535 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2257,7 +2257,7 @@ + (unspec:P [(mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "symbolic_operand" "")))] + UNSPEC_TLS_LOW))] +- "TARGET_EXPLICIT_RELOCS" ++ "" + "addi.\t%0,%1,%L2" + [(set_attr "type" "arith") + (set_attr "mode" "")]) +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..957ff98df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +new file mode 100644 +index 000000000..78898cfc6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mexplicit-relocs=auto" } */ ++ ++#include "explicit-relocs-auto-tls-ld-gd.c" ++ ++/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ +-- +2.43.0 + diff --git a/0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch b/0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch new file mode 100644 index 0000000..b2962b8 --- /dev/null +++ b/0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch @@ -0,0 +1,245 @@ +From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 1 Oct 2023 11:14:29 +0800 +Subject: [PATCH 018/188] LoongArch: Use explicit relocs for addresses only + used for one load or store with -mexplicit-relocs=auto and + -mcmodel={normal,medium} + +In these cases, if we use explicit relocs, we end up with 2 +instructions: + + pcalau12i t0, %pc_hi20(x) + ld.d t0, t0, %pc_lo12(x) + +If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB +range) we still have 2 instructions: + + pcaddi t0, %pcrel_20(x) + ld.d t0, t0, 0 + +If x is out of the range we'll have 3 instructions. So for these cases +just emit machine instructions with explicit relocs. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (symbolic_pcrel_operand): New + predicate. + * config/loongarch/loongarch.md (define_peephole2): Optimize + la.local + ld/st to pcalau12i + ld/st if the address is only used + once if -mexplicit-relocs=auto and -mcmodel=normal or medium. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c: + New test. +--- + gcc/config/loongarch/loongarch.md | 122 ++++++++++++++++++ + gcc/config/loongarch/predicates.md | 7 + + ...-relocs-auto-single-load-store-no-anchor.c | 6 + + .../explicit-relocs-auto-single-load-store.c | 14 ++ + 4 files changed, 149 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3b836d535..c4c6baa60 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -65,6 +65,7 @@ + + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I ++ UNSPEC_PCALAU12I_GR + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2297,6 +2298,16 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + [(set_attr "type" "move")]) + ++;; @pcalau12i may be used for sibcall so it has a strict constraint. This ++;; allows any general register as the operand. ++(define_insn "@pcalau12i_gr" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P [(match_operand:P 1 "symbolic_operand" "")] ++ UNSPEC_PCALAU12I_GR))] ++ "" ++ "pcalau12i\t%0,%%pc_hi20(%1)" ++ [(set_attr "type" "move")]) ++ + (define_insn "@ori_l_lo12" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "register_operand" "r") +@@ -3748,6 +3759,117 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + ++;; With normal or medium code models, if the only use of a pc-relative ++;; address is for loading or storing a value, then relying on linker ++;; relaxation is not better than emitting the machine instruction directly. ++;; Even if the la.local pseudo op can be relaxed, we get: ++;; ++;; pcaddi $t0, %pcrel_20(x) ++;; ld.d $t0, $t0, 0 ++;; ++;; There are still two instructions, same as using the machine instructions ++;; and explicit relocs: ++;; ++;; pcalau12i $t0, %pc_hi20(x) ++;; ld.d $t0, $t0, %pc_lo12(x) ++;; ++;; And if the pseudo op cannot be relaxed, we'll get a worse result (with ++;; 3 instructions). ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (match_dup 0)))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0]) \ ++ || REGNO (operands[0]) == REGNO (operands[2]))" ++ [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))] ++ { ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (mem:GPR (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0]) \ ++ || REGNO (operands[0]) == REGNO (operands[2]))" ++ [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))] ++ { ++ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR (mem:SUBDI (match_dup 0))))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0]) \ ++ || REGNO (operands[0]) == REGNO (operands[2]))" ++ [(set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1)))))] ++ { ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (match_operand:GPR 2 "register_operand") ++ (any_extend:GPR ++ (mem:SUBDI (plus (match_dup 0) ++ (match_operand 3 "const_int_operand")))))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0]) \ ++ || REGNO (operands[0]) == REGNO (operands[2]))" ++ [(set (match_dup 2) ++ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) ++ (match_dup 1)))))] ++ { ++ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (match_dup 0)) ++ (match_operand:QHWD 2 "register_operand"))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0])) \ ++ && REGNO (operands[0]) != REGNO (operands[2])" ++ [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++ { ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:P 0 "register_operand") ++ (match_operand:P 1 "symbolic_pcrel_operand")) ++ (set (mem:QHWD (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:QHWD 2 "register_operand"))] ++ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ ++ && (peep2_reg_dead_p (2, operands[0])) \ ++ && REGNO (operands[0]) != REGNO (operands[2])" ++ [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++ { ++ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); ++ emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ }) ++ + ;; Synchronization instructions. + + (include "sync.md") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 6b50b3a4d..1d669f560 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -563,6 +563,13 @@ + return loongarch_symbolic_constant_p (op, &type); + }) + ++(define_predicate "symbolic_pcrel_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL; ++}) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c +new file mode 100644 +index 000000000..fb03403d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto -fno-section-anchors" } */ ++ ++#include "explicit-relocs-auto-single-load-store.c" ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c +new file mode 100644 +index 000000000..0d53644cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */ ++ ++long a; ++int b; ++unsigned int c; ++ ++long load_a() { return a; } ++long load_b() { return b; } ++long load_c() { return c; } ++void store_a(long x) { a = x; } ++void store_b(int x) { b = x; } ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +-- +2.43.0 + diff --git a/0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch b/0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch new file mode 100644 index 0000000..ec34040 --- /dev/null +++ b/0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch @@ -0,0 +1,84 @@ +From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 24 Oct 2023 14:40:14 +0800 +Subject: [PATCH 019/188] LoongArch: Implement __builtin_thread_pointer for + TLS. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (get_thread_pointer):Adds the + instruction template corresponding to the __builtin_thread_pointer + function. + * doc/extend.texi:Add the __builtin_thread_pointer function support + description to the documentation. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/builtin_thread_pointer.c: New test. +--- + gcc/config/loongarch/loongarch.md | 7 +++++++ + gcc/doc/extend.texi | 5 +++++ + .../gcc.target/loongarch/builtin_thread_pointer.c | 10 ++++++++++ + 3 files changed, 22 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c4c6baa60..80487488d 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -113,6 +113,7 @@ + + (define_constants + [(RETURN_ADDR_REGNUM 1) ++ (TP_REGNUM 2) + (T0_REGNUM 12) + (T1_REGNUM 13) + (S0_REGNUM 23) +@@ -3647,6 +3648,12 @@ + [(set_attr "length" "0") + (set_attr "type" "ghost")]) + ++;; Named pattern for expanding thread pointer reference. ++(define_expand "get_thread_pointer" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (reg:P TP_REGNUM))] ++ "HAVE_AS_TLS" ++ {}) + + (define_split + [(match_operand 0 "small_data_pattern")] +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 1d1bac255..497c6de5f 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}. + void __break (imm0_32767) + @end smallexample + ++Returns the value that is currently set in the @samp{tp} register. ++@smallexample ++ void * __builtin_thread_pointer (void) ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +new file mode 100644 +index 000000000..541e3b143 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target tls_native } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */ ++ ++void * ++get_tp () ++{ ++ return __builtin_thread_pointer (); ++} +-- +2.43.0 + diff --git a/0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch b/0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch new file mode 100644 index 0000000..549bee5 --- /dev/null +++ b/0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch @@ -0,0 +1,189 @@ +From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Mon, 23 Oct 2023 10:13:24 +0800 +Subject: [PATCH 020/188] LoongArch: Fix vfrint-releated comments in + lsxintrin.h and lasxintrin.h + +The comment of vfrint-related intrinsic functions does not match the return +value type in definition. This patch fixes these comments. + +gcc/ChangeLog: + + * config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments. + (__lasx_xvfrintrne_s): Ditto. + (__lasx_xvfrintrne_d): Ditto. + (__lasx_xvfrintrz_s): Ditto. + (__lasx_xvfrintrz_d): Ditto. + (__lasx_xvfrintrp_s): Ditto. + (__lasx_xvfrintrp_d): Ditto. + (__lasx_xvfrintrm_s): Ditto. + (__lasx_xvfrintrm_d): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto. + (__lsx_vfrintrne_s): Ditto. + (__lsx_vfrintrne_d): Ditto. + (__lsx_vfrintrz_s): Ditto. + (__lsx_vfrintrz_d): Ditto. + (__lsx_vfrintrp_s): Ditto. + (__lsx_vfrintrp_d): Ditto. + (__lsx_vfrintrm_s): Ditto. + (__lsx_vfrintrm_d): Ditto. +--- + gcc/config/loongarch/lasxintrin.h | 16 ++++++++-------- + gcc/config/loongarch/lsxintrin.h | 16 ++++++++-------- + 2 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index d39379927..7bce2c757 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrne_s (__m256 _1) + { +@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrne_d (__m256d _1) + { +@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrz_s (__m256 _1) + { +@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrz_d (__m256d _1) + { +@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrp_s (__m256 _1) + { +@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrp_d (__m256d _1) + { +@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V8SI, V8SF. */ ++/* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256 __lasx_xvfrintrm_s (__m256 _1) + { +@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1) + } + + /* Assembly instruction format: xd, xj. */ +-/* Data types in instruction templates: V4DI, V4DF. */ ++/* Data types in instruction templates: V4DF, V4DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m256d __lasx_xvfrintrm_d (__m256d _1) + { +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index ec4206990..29553c093 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrne_s (__m128 _1) + { +@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrne_d (__m128d _1) + { +@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrz_s (__m128 _1) + { +@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrz_d (__m128d _1) + { +@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrp_s (__m128 _1) + { +@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrp_d (__m128d _1) + { +@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V4SI, V4SF. */ ++/* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128 __lsx_vfrintrm_s (__m128 _1) + { +@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1) + } + + /* Assembly instruction format: vd, vj. */ +-/* Data types in instruction templates: V2DI, V2DF. */ ++/* Data types in instruction templates: V2DF, V2DF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __m128d __lsx_vfrintrm_d (__m128d _1) + { +-- +2.43.0 + diff --git a/0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch b/0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch new file mode 100644 index 0000000..1fd8d8b --- /dev/null +++ b/0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch @@ -0,0 +1,418 @@ +From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Thu, 26 Oct 2023 09:34:32 +0800 +Subject: [PATCH 021/188] LoongArch:Enable vcond_mask_mn expanders for SF/DF + modes. + +If the vcond_mask patterns don't support fp modes, the vector +FP comparison instructions will not be generated. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vcond_mask_): Change to + (vcond_mask_): this. + * config/loongarch/lsx.md (vcond_mask_): Change to + (vcond_mask_): this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 14 +-- + gcc/config/loongarch/lsx.md | 14 +-- + .../loongarch/vector/lasx/lasx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 87 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-1.c | 64 ++++++++++++++ + .../loongarch/vector/lsx/lsx-vcond-2.c | 87 +++++++++++++++++++ + 6 files changed, 316 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 442fda246..f0f2dd08d 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -906,15 +906,15 @@ + }) + + ;; Same as vcond_ +-(define_expand "vcond_mask_" +- [(match_operand:ILASX 0 "register_operand") +- (match_operand:ILASX 1 "reg_or_m1_operand") +- (match_operand:ILASX 2 "reg_or_0_operand") +- (match_operand:ILASX 3 "register_operand")] ++(define_expand "vcond_mask_" ++ [(match_operand:LASX 0 "register_operand") ++ (match_operand:LASX 1 "reg_or_m1_operand") ++ (match_operand:LASX 2 "reg_or_0_operand") ++ (match_operand: 3 "register_operand")] + "ISA_HAS_LASX" + { +- loongarch_expand_vec_cond_mask_expr (mode, +- mode, operands); ++ loongarch_expand_vec_cond_mask_expr (mode, ++ mode, operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index b4e92ae9c..4af32c8df 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -644,15 +644,15 @@ + DONE; + }) + +-(define_expand "vcond_mask_" +- [(match_operand:ILSX 0 "register_operand") +- (match_operand:ILSX 1 "reg_or_m1_operand") +- (match_operand:ILSX 2 "reg_or_0_operand") +- (match_operand:ILSX 3 "register_operand")] ++(define_expand "vcond_mask_" ++ [(match_operand:LSX 0 "register_operand") ++ (match_operand:LSX 1 "reg_or_m1_operand") ++ (match_operand:LSX 2 "reg_or_0_operand") ++ (match_operand: 3 "register_operand")] + "ISA_HAS_LSX" + { +- loongarch_expand_vec_cond_mask_expr (mode, +- mode, operands); ++ loongarch_expand_vec_cond_mask_expr (mode, ++ mode, operands); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +new file mode 100644 +index 000000000..ee9cb1a1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */ ++ ++#include ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ CMP_TYPE *__restrict__ b, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = x[i], yval = y[i]; \ ++ CMP_TYPE aval = a[i], bval = b[i]; \ ++ r[i] = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX) \ ++ T (int16_t, int16_t, COND, SUFFIX) \ ++ T (int32_t, int32_t, COND, SUFFIX) \ ++ T (int64_t, int64_t, COND, SUFFIX) \ ++ T (float, int32_t, COND, SUFFIX##_float) \ ++ T (double, int64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX) \ ++ T (uint16_t, uint16_t, COND, SUFFIX) \ ++ T (uint32_t, uint32_t, COND, SUFFIX) \ ++ T (uint64_t, uint64_t, COND, SUFFIX) \ ++ T (float, uint32_t, COND, SUFFIX##_float) \ ++ T (double, uint64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, >, _gt) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, >=, _ge) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +new file mode 100644 +index 000000000..5f40ed44c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlasx" } */ ++ ++#include ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define TEST_LOOP(TYPE1, TYPE2, CMP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ ++ TYPE1 *restrict src, \ ++ TYPE1 fallback, \ ++ TYPE2 *restrict a, \ ++ TYPE2 *restrict b, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ {\ ++ TYPE2 aval = a[i]; \ ++ TYPE2 bval = b[i]; \ ++ TYPE1 srcval = src[i]; \ ++ dest[i] = CMP (aval, bval) ? srcval : fallback; \ ++ }\ ++ } ++ ++#define TEST_CMP(CMP) \ ++ TEST_LOOP (int32_t, float, CMP) \ ++ TEST_LOOP (uint32_t, float, CMP) \ ++ TEST_LOOP (float, float, CMP) \ ++ TEST_LOOP (int64_t, double, CMP) \ ++ TEST_LOOP (uint64_t, double, CMP) \ ++ TEST_LOOP (double, double, CMP) ++ ++TEST_CMP (eq) ++TEST_CMP (ne) ++TEST_CMP (olt) ++TEST_CMP (ole) ++TEST_CMP (oge) ++TEST_CMP (ogt) ++TEST_CMP (ordered) ++TEST_CMP (unordered) ++TEST_CMP (ueq) ++TEST_CMP (ult) ++TEST_CMP (ule) ++TEST_CMP (uge) ++TEST_CMP (ugt) ++TEST_CMP (nueq) ++TEST_CMP (nult) ++TEST_CMP (nule) ++TEST_CMP (nuge) ++TEST_CMP (nugt) ++ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +new file mode 100644 +index 000000000..138adccfa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlsx" } */ ++ ++#include ++ ++#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ ++ void __attribute__ ((noinline, noclone)) \ ++ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ ++ DATA_TYPE *__restrict__ x, \ ++ DATA_TYPE *__restrict__ y, \ ++ CMP_TYPE *__restrict__ a, \ ++ CMP_TYPE *__restrict__ b, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ DATA_TYPE xval = x[i], yval = y[i]; \ ++ CMP_TYPE aval = a[i], bval = b[i]; \ ++ r[i] = aval COND bval ? xval : yval; \ ++ } \ ++ } ++ ++#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ ++ T (int8_t, int8_t, COND, SUFFIX) \ ++ T (int16_t, int16_t, COND, SUFFIX) \ ++ T (int32_t, int32_t, COND, SUFFIX) \ ++ T (int64_t, int64_t, COND, SUFFIX) \ ++ T (float, int32_t, COND, SUFFIX##_float) \ ++ T (double, int64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ ++ T (uint8_t, uint8_t, COND, SUFFIX) \ ++ T (uint16_t, uint16_t, COND, SUFFIX) \ ++ T (uint32_t, uint32_t, COND, SUFFIX) \ ++ T (uint64_t, uint64_t, COND, SUFFIX) \ ++ T (float, uint32_t, COND, SUFFIX##_float) \ ++ T (double, uint64_t, COND, SUFFIX##_double) ++ ++#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ ++ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ ++ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) ++ ++#define TEST_VAR_ALL(T) \ ++ TEST_COND_VAR_ALL (T, >, _gt) \ ++ TEST_COND_VAR_ALL (T, <, _lt) \ ++ TEST_COND_VAR_ALL (T, >=, _ge) \ ++ TEST_COND_VAR_ALL (T, <=, _le) \ ++ TEST_COND_VAR_ALL (T, ==, _eq) \ ++ TEST_COND_VAR_ALL (T, !=, _ne) ++ ++TEST_VAR_ALL (DEF_VCOND_VAR) ++ ++/* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +new file mode 100644 +index 000000000..e8fe31f8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -0,0 +1,87 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlsx" } */ ++ ++#include ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define TEST_LOOP(TYPE1, TYPE2, CMP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ ++ TYPE1 *restrict src, \ ++ TYPE1 fallback, \ ++ TYPE2 *restrict a, \ ++ TYPE2 *restrict b, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ {\ ++ TYPE2 aval = a[i]; \ ++ TYPE2 bval = b[i]; \ ++ TYPE1 srcval = src[i]; \ ++ dest[i] = CMP (aval, bval) ? srcval : fallback; \ ++ }\ ++ } ++ ++#define TEST_CMP(CMP) \ ++ TEST_LOOP (int32_t, float, CMP) \ ++ TEST_LOOP (uint32_t, float, CMP) \ ++ TEST_LOOP (float, float, CMP) \ ++ TEST_LOOP (int64_t, double, CMP) \ ++ TEST_LOOP (uint64_t, double, CMP) \ ++ TEST_LOOP (double, double, CMP) ++ ++TEST_CMP (eq) ++TEST_CMP (ne) ++TEST_CMP (olt) ++TEST_CMP (ole) ++TEST_CMP (oge) ++TEST_CMP (ogt) ++TEST_CMP (ordered) ++TEST_CMP (unordered) ++TEST_CMP (ueq) ++TEST_CMP (ult) ++TEST_CMP (ule) ++TEST_CMP (uge) ++TEST_CMP (ugt) ++TEST_CMP (nueq) ++TEST_CMP (nult) ++TEST_CMP (nule) ++TEST_CMP (nuge) ++TEST_CMP (nugt) ++ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */ +-- +2.43.0 + diff --git a/0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch b/0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch new file mode 100644 index 0000000..403d90e --- /dev/null +++ b/0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch @@ -0,0 +1,34 @@ +From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 30 Oct 2023 19:39:27 +0800 +Subject: [PATCH 022/188] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined + [PR112299] + +Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure +building a cross compiler if the cross assembler is not installed yet. + +gcc/ChangeLog: + + PR target/112299 + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0 + if not defined yet. +--- + gcc/config/loongarch/loongarch-opts.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index f2b59abe6..c4975af00 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_TLS ++#define HAVE_AS_TLS 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +-- +2.43.0 + diff --git a/0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch b/0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch new file mode 100644 index 0000000..72f8585 --- /dev/null +++ b/0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch @@ -0,0 +1,30 @@ +From bc3ae60454a51b80538b6deba21975d43de23b6a Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 3 Nov 2023 17:01:36 +0800 +Subject: [PATCH 023/188] LoongArch: Fix instruction name typo in + lsx_vreplgr2vr_ template + +gcc/ChangeLog: + + * config/loongarch/lsx.md: Fix instruction name typo in + lsx_vreplgr2vr_ template. +--- + gcc/config/loongarch/lsx.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 4af32c8df..55c7d79a0 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1523,7 +1523,7 @@ + "ISA_HAS_LSX" + { + if (which_alternative == 1) +- return "ldi.\t%w0,0"; ++ return "vldi.\t%w0,0"; + + if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) + return "#"; +-- +2.43.0 + diff --git a/0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch b/0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch new file mode 100644 index 0000000..0a43f5e --- /dev/null +++ b/0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch @@ -0,0 +1,116 @@ +From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 12 Nov 2023 00:55:13 +0800 +Subject: [PATCH 024/188] LoongArch: Use simplify_gen_subreg instead of + gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr [PR112476] + +GCC internal says: + + 'subreg's of 'subreg's are not supported. Using + 'simplify_gen_subreg' is the recommended way to avoid this problem. + +Unfortunately loongarch_expand_vec_cond_mask_expr might create nested +subreg under certain circumstances, causing an ICE. + +Use simplify_gen_subreg as the internal document suggests. + +gcc/ChangeLog: + + PR target/112476 + * config/loongarch/loongarch.cc + (loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg + instead of gen_rtx_SUBREG. + +gcc/testsuite/ChangeLog: + + PR target/112476 + * gcc.target/loongarch/pr112476-1.c: New test. + * gcc.target/loongarch/pr112476-2.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 11 ++++++--- + .../gcc.target/loongarch/pr112476-1.c | 24 +++++++++++++++++++ + .../gcc.target/loongarch/pr112476-2.c | 5 ++++ + 3 files changed, 37 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index fa5c14be6..65ca1489f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0)); ++ emit_move_insn (xop1, ++ simplify_gen_subreg (vimode, operands[1], ++ mode, 0)); + } + emit_move_insn (src1, xop1); + } +@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0)); ++ emit_move_insn (xop2, ++ simplify_gen_subreg (vimode, operands[2], ++ mode, 0)); + } + emit_move_insn (src2, xop2); + } +@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands[0], simplify_gen_subreg (mode, mask, ++ vimode, 0)); + } + } + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +new file mode 100644 +index 000000000..4cf133e7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c +@@ -0,0 +1,24 @@ ++/* PR target/112476: ICE with -mlsx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */ ++ ++int foo, bar; ++float baz, res, a; ++ ++void ++apply_adjacent_ternary (float *dst, float *src0) ++{ ++ do ++ { ++ __builtin_memcpy (&res, &src0, sizeof (res)); ++ *dst = foo ? baz : res; ++ dst++; ++ } ++ while (dst != src0); ++} ++ ++void ++xx (void) ++{ ++ apply_adjacent_ternary (&a, &a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +new file mode 100644 +index 000000000..cc0dfbfc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c +@@ -0,0 +1,5 @@ ++/* PR target/112476: ICE with -mlasx */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */ ++ ++#include "pr112476-1.c" +-- +2.43.0 + diff --git a/0025-LoongArch-Optimize-single-used-address-with-mexplici.patch b/0025-LoongArch-Optimize-single-used-address-with-mexplici.patch new file mode 100644 index 0000000..91b35d9 --- /dev/null +++ b/0025-LoongArch-Optimize-single-used-address-with-mexplici.patch @@ -0,0 +1,116 @@ +From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 30 Oct 2023 20:24:58 +0800 +Subject: [PATCH 025/188] LoongArch: Optimize single-used address with + -mexplicit-relocs=auto for fld/fst + +fld and fst have same address mode as ld.w and st.w, so the same +optimization as r14-4851 should be applied for them too. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode + iterator. + (ST_ANY): New mode iterator. + (define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and + ST_ANY instead of QHWD for applicable patterns. +--- + gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------ + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 80487488d..ed86c95bd 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,6 +400,14 @@ + (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")]) + ++;; A mode for anything with 32 bits or more, and able to be loaded with ++;; the same addressing mode as ld.w. ++(define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF]) ++ ++;; A mode for anything able to be stored with the same addressing mode as ++;; st.w. ++(define_mode_iterator ST_ANY [QHWD ANYF]) ++ + ;; In GPR templates, a string like "mul." will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d [(SI "w") (DI "d")]) +@@ -3785,13 +3793,14 @@ + (define_peephole2 + [(set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (match_dup 0)))] ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (match_dup 0)))] + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands[0]) \ + || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))] ++ [(set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] + { + emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); + }) +@@ -3799,14 +3808,15 @@ + (define_peephole2 + [(set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (mem:GPR (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))))] ++ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") ++ (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))))] + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands[0]) \ + || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))] ++ [(set (match_dup 2) ++ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] + { + operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); + emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); +@@ -3850,13 +3860,13 @@ + (define_peephole2 + [(set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (match_dup 0)) +- (match_operand:QHWD 2 "register_operand"))] ++ (set (mem:ST_ANY (match_dup 0)) ++ (match_operand:ST_ANY 2 "register_operand"))] + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands[0])) \ + && REGNO (operands[0]) != REGNO (operands[2])" +- [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++ [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] + { + emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); + }) +@@ -3864,14 +3874,14 @@ + (define_peephole2 + [(set (match_operand:P 0 "register_operand") + (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:QHWD (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))) +- (match_operand:QHWD 2 "register_operand"))] ++ (set (mem:ST_ANY (plus (match_dup 0) ++ (match_operand 3 "const_int_operand"))) ++ (match_operand:ST_ANY 2 "register_operand"))] + "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ + && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ + && (peep2_reg_dead_p (2, operands[0])) \ + && REGNO (operands[0]) != REGNO (operands[2])" +- [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++ [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] + { + operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); + emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); +-- +2.43.0 + diff --git a/0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch b/0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch new file mode 100644 index 0000000..cbe62f4 --- /dev/null +++ b/0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch @@ -0,0 +1,305 @@ +From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 3 Nov 2023 21:19:59 +0800 +Subject: [PATCH 026/188] LoongArch: Disable relaxation if the assembler don't + support conditional branch relaxation [PR112330] + +As the commit message of r14-4674 has indicated, if the assembler does +not support conditional branch relaxation, a relocation overflow may +happen on conditional branches when relaxation is enabled because the +number of NOP instructions inserted by the assembler will be more than +the number estimated by GCC. + +To work around this issue, disable relaxation by default if the +assembler is detected incapable to perform conditional branch relaxation +at GCC build time. We also need to pass -mno-relax to the assembler to +really disable relaxation. But, if the assembler does not support +-mrelax option at all, we should not pass -mno-relax to the assembler or +it will immediately error out. Also handle this with the build time +assembler capability probing, and add a pair of options +-m[no-]pass-mrelax-to-as to allow using a different assembler from the +build-time one. + +With this change, if GCC is built with GAS 2.41, relaxation will be +disabled by default. So the default value of -mexplicit-relocs= is also +changed to 'always' if -mno-relax is specified or implied by the +build-time default, because using assembler macros for symbol addresses +produces no benefit when relaxation is disabled. + +gcc/ChangeLog: + + PR target/112330 + * config/loongarch/genopts/loongarch.opt.in: Add + -m[no]-pass-relax-to-as. Change the default of -m[no]-relax to + account conditional branch relaxation support status. + * config/loongarch/loongarch.opt: Regenerate. + * configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if + the assembler supports conditional branch relaxation. + * configure: Regenerate. + * config.in: Regenerate. Note that there are some unrelated + changes introduced by r14-5424 (which does not contain a + config.in regeneration). + * config/loongarch/loongarch-opts.h + (HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined. + * config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT): + Define. + (ASM_MRELAX_SPEC): Define. + (ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}". + * config/loongarch/loongarch.cc: Take the setting of + -m[no-]relax into account when determining the default of + -mexplicit-relocs=. + * doc/invoke.texi: Document -m[no-]relax and + -m[no-]pass-mrelax-to-as for LoongArch. Update the default + value of -mexplicit-relocs=. +--- + gcc/config.in | 35 ++++++++++++++++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 +++- + gcc/config/loongarch/loongarch-driver.h | 16 ++++++++- + gcc/config/loongarch/loongarch-opts.h | 4 +++ + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.opt | 6 +++- + gcc/configure | 35 +++++++++++++++++++ + gcc/configure.ac | 10 ++++++ + 8 files changed, 109 insertions(+), 5 deletions(-) + +diff --git a/gcc/config.in b/gcc/config.in +index 0c55e67e7..04968b53c 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -374,6 +374,12 @@ + #endif + + ++/* Define if your assembler supports conditional branch relaxation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_COND_BRANCH_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports the --debug-prefix-map option. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_DEBUG_PREFIX_MAP +@@ -798,6 +804,20 @@ + #endif + + ++/* Define to 1 if you have the Mac OS X function ++ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES ++#endif ++ ++ ++/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in ++ the CoreFoundation framework. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_CFPREFERENCESCOPYAPPVALUE ++#endif ++ ++ + /* Define to 1 if you have the `clearerr_unlocked' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_CLEARERR_UNLOCKED +@@ -822,6 +842,13 @@ + #endif + + ++/* Define if the GNU dcgettext() function is already present or preinstalled. ++ */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_DCGETTEXT ++#endif ++ ++ + /* Define to 1 if we found a declaration for 'abort', otherwise define to 0. + */ + #ifndef USED_FOR_TARGET +@@ -1554,6 +1581,12 @@ + #endif + + ++/* Define if the GNU gettext() function is already present or preinstalled. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_GETTEXT ++#endif ++ ++ + /* Define to 1 if you have the `gettimeofday' function. */ + #ifndef USED_FOR_TARGET + #undef HAVE_GETTIMEOFDAY +@@ -1585,7 +1618,7 @@ + #endif + + +-/* Define if you have the iconv() function. */ ++/* Define if you have the iconv() function and it works. */ + #ifndef USED_FOR_TARGET + #undef HAVE_ICONV + #endif +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index e7df1964a..bd3cfaf60 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + ++mpass-mrelax-to-as ++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Pass -mrelax or -mno-relax option to the assembler. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h +index 59fa3263d..c8dba2cc4 100644 +--- a/gcc/config/loongarch/loongarch-driver.h ++++ b/gcc/config/loongarch/loongarch-driver.h +@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3. If not see + "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \ + "%(subtarget_cc1_spec)" + ++#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}" ++#else ++#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}" ++#endif ++ ++#if HAVE_AS_MRELAX_OPTION ++#define ASM_MRELAX_SPEC \ ++ "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#else ++#define ASM_MRELAX_SPEC \ ++ "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}" ++#endif ++ + #undef ASM_SPEC + #define ASM_SPEC \ +- "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)" ++ "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)" + + + extern const char* +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index c4975af00..dfbe9dd5c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_MRELAX_OPTION 0 + #endif + ++#ifndef HAVE_AS_COND_BRANCH_RELAXATION ++#define HAVE_AS_COND_BRANCH_RELAXATION 0 ++#endif ++ + #ifndef HAVE_AS_TLS + #define HAVE_AS_TLS 0 + #endif +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 65ca1489f..6d580ee75 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7428,7 +7428,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + + if (la_opt_explicit_relocs == M_OPT_UNSET) + la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (HAVE_AS_MRELAX_OPTION ++ ? (loongarch_mrelax + ? EXPLICIT_RELOCS_AUTO + : EXPLICIT_RELOCS_ALWAYS) + : EXPLICIT_RELOCS_NONE); +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 44376fd77..d936954b8 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -236,10 +236,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION) ++Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + ++mpass-mrelax-to-as ++Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Pass -mrelax or -mno-relax option to the assembler. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/configure b/gcc/configure +index 430d44dc3..09bacfec3 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28901,6 +28901,41 @@ if test $gcc_cv_as_loongarch_relax = yes; then + + $as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for conditional branch relaxation support" >&5 ++$as_echo_n "checking assembler for conditional branch relaxation support... " >&6; } ++if ${gcc_cv_as_loongarch_cond_branch_relax+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_cond_branch_relax=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'a: ++ .rept 32769 ++ nop ++ .endr ++ beq $a0,$a1,a' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_cond_branch_relax=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_cond_branch_relax" >&5 ++$as_echo "$gcc_cv_as_loongarch_cond_branch_relax" >&6; } ++if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then ++ ++$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 4b24db190..a0999152e 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5341,6 +5341,16 @@ x: + [-mrelax], [.text],, + [AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1, + [Define if your assembler supports -mrelax option.])]) ++ gcc_GAS_CHECK_FEATURE([conditional branch relaxation support], ++ gcc_cv_as_loongarch_cond_branch_relax, ++ [], ++ [a: ++ .rept 32769 ++ nop ++ .endr ++ beq $a0,$a1,a],, ++ [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1, ++ [Define if your assembler supports conditional branch relaxation.])]) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE([.gnu_attribute support], +-- +2.43.0 + diff --git a/0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch b/0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch new file mode 100644 index 0000000..28df51c --- /dev/null +++ b/0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch @@ -0,0 +1,391 @@ +From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 6 Nov 2023 16:06:08 +0800 +Subject: [PATCH 027/188] LoongArch: Remove redundant barrier instructions + before LL-SC loops + +This is isomorphic to the LLVM changes [1-2]. + +On LoongArch, the LL and SC instructions has memory barrier semantics: + +- LL: + +- SC: + + +But the compare and swap operation is allowed to fail, and if it fails +the SC instruction is not executed, thus the guarantee of acquiring +semantics cannot be ensured. Therefore, an acquire barrier needs to be +generated when failure_memorder includes an acquire operation. + +On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an +acquire barrier; on CPUs implementing LoongArch v1.00, it is a full +barrier. So it's always enough for acquire semantics. OTOH if an +acquire semantic is not needed, we still needs the "dbar 0x700" as the +load-load barrier like all LL-SC loops. + +[1]:https://github.com/llvm/llvm-project/pull/67391 +[2]:https://github.com/llvm/llvm-project/pull/69339 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_memmodel_needs_release_fence): Remove. + (loongarch_cas_failure_memorder_needs_acquire): New static + function. + (loongarch_print_operand): Redefine 'G' for the barrier on CAS + failure. + * config/loongarch/sync.md (atomic_cas_value_strong): + Remove the redundant barrier before the LL instruction, and + emit an acquire barrier on failure if needed by + failure_memorder. + (atomic_cas_value_cmp_and_7_): Likewise. + (atomic_cas_value_add_7_): Remove the unnecessary barrier + before the LL instruction. + (atomic_cas_value_sub_7_): Likewise. + (atomic_cas_value_and_7_): Likewise. + (atomic_cas_value_xor_7_): Likewise. + (atomic_cas_value_or_7_): Likewise. + (atomic_cas_value_nand_7_): Likewise. + (atomic_cas_value_exchange_7_): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cas-acquire.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 30 ++++--- + gcc/config/loongarch/sync.md | 49 +++++------ + .../gcc.target/loongarch/cas-acquire.c | 82 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 42 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 6d580ee75..8467f03cf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + } + } + +-/* Return true if a FENCE should be emitted to before a memory access to +- implement the release portion of memory model MODEL. */ ++/* Return true if a FENCE should be emitted after a failed CAS to ++ implement the acquire semantic of failure_memorder. */ + + static bool +-loongarch_memmodel_needs_release_fence (enum memmodel model) ++loongarch_cas_failure_memorder_needs_acquire (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { ++ case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: +- case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + return true; + +- case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_RELAXED: ++ case MEMMODEL_RELEASE: + return false; + ++ /* MEMMODEL_CONSUME is deliberately not handled because it's always ++ replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by ++ MEMMODEL_CONSUME, read the change (re)introducing it carefully and ++ decide what to do. See PR 59448 and get_memmodel in builtins.cc. */ + default: + gcc_unreachable (); + } +@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'd' Print CONST_INT OP in decimal. + 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. + 'F' Print the FPU branch condition for comparison OP. +- 'G' Print a DBAR insn if the memory model requires a release. ++ 'G' Print a DBAR insn for CAS failure (with an acquire semantic if ++ needed, otherwise a simple load-load barrier). + 'H' Print address 52-61bit relocation associated with OP. + 'h' Print the high-part relocation associated with OP. + 'i' Print i if the operand is not a register. +@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + break; + + case 'G': +- if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) +- fputs ("dbar\t0", file); ++ if (loongarch_cas_failure_memorder_needs_acquire ( ++ memmodel_from_int (INTVAL (op)))) ++ fputs ("dbar\t0b10100", file); ++ else ++ fputs ("dbar\t0x700", file); + break; + + case 'h': +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index efa40f24c..dd1f98946 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -162,19 +162,18 @@ + (clobber (match_scratch:GPR 6 "=&r"))] + "" + { +- return "%G5\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" + "or%i3\\t%6,$zero,%3\\n\\t" + "sc.\\t%6,%1\\n\\t" +- "beq\\t$zero,%6,1b\\n\\t" ++ "beqz\\t%6,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G5\\n\\t" + "3:\\n\\t"; + } +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output +@@ -267,8 +266,7 @@ + (clobber (match_scratch:GPR 7 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%2\\n\\t" + "bne\\t%7,%z4,2f\\n\\t" +@@ -278,10 +276,10 @@ + "beq\\t$zero,%7,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "dbar\\t0x700\\n\\t" ++ "%G6\\n\\t" + "3:\\n\\t"; + } +- [(set (attr "length") (const_int 40))]) ++ [(set (attr "length") (const_int 36))]) + + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output +@@ -336,8 +334,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "add.w\\t%8,%0,%z5\\n\\t" +@@ -347,7 +344,7 @@ + "beq\\t$zero,%7,1b"; + } + +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_sub_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -363,8 +360,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "sub.w\\t%8,%0,%z5\\n\\t" +@@ -373,7 +369,7 @@ + "sc.\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_and_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -389,8 +385,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "and\\t%8,%0,%z5\\n\\t" +@@ -399,7 +394,7 @@ + "sc.\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_xor_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -415,8 +410,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "xor\\t%8,%0,%z5\\n\\t" +@@ -426,7 +420,7 @@ + "beq\\t$zero,%7,1b"; + } + +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_or_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -442,8 +436,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "or\\t%8,%0,%z5\\n\\t" +@@ -453,7 +446,7 @@ + "beq\\t$zero,%7,1b"; + } + +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_nand_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res +@@ -469,8 +462,7 @@ + (clobber (match_scratch:GPR 8 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%3\\n\\t" + "and\\t%8,%0,%z5\\n\\t" +@@ -479,7 +471,7 @@ + "sc.\\t%7,%1\\n\\t" + "beq\\t$zero,%7,1b"; + } +- [(set (attr "length") (const_int 32))]) ++ [(set (attr "length") (const_int 28))]) + + (define_insn "atomic_cas_value_exchange_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") +@@ -494,8 +486,7 @@ + (clobber (match_scratch:GPR 7 "=&r"))] + "" + { +- return "%G6\\n\\t" +- "1:\\n\\t" ++ return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "and\\t%7,%0,%z3\\n\\t" + "or%i5\\t%7,%7,%5\\n\\t" +diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c +new file mode 100644 +index 000000000..ff7ba866f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c +@@ -0,0 +1,82 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target c99_runtime } */ ++/* { dg-require-effective-target pthread } */ ++/* { dg-options "-std=c99 -pthread" } */ ++ ++/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934 ++ reported that this had failed with GCC and 3A6000. */ ++ ++#include ++#include ++#include ++#include ++ ++static unsigned int tags[32]; ++static unsigned int vals[32]; ++ ++static void * ++writer_entry (void *data) ++{ ++ atomic_uint *pt = (atomic_uint *)tags; ++ atomic_uint *pv = (atomic_uint *)vals; ++ ++ for (unsigned int n = 1; n < 10000; n++) ++ { ++ atomic_store_explicit (&pv[n & 31], n, memory_order_release); ++ atomic_store_explicit (&pt[n & 31], n, memory_order_release); ++ } ++ ++ return NULL; ++} ++ ++static void * ++reader_entry (void *data) ++{ ++ atomic_uint *pt = (atomic_uint *)tags; ++ atomic_uint *pv = (atomic_uint *)vals; ++ int i; ++ ++ for (;;) ++ { ++ for (i = 0; i < 32; i++) ++ { ++ unsigned int tag = 0; ++ bool res; ++ ++ res = atomic_compare_exchange_weak_explicit ( ++ &pt[i], &tag, 0, memory_order_acquire, memory_order_acquire); ++ if (!res) ++ { ++ unsigned int val; ++ ++ val = atomic_load_explicit (&pv[i], memory_order_relaxed); ++ if (val < tag) ++ __builtin_trap (); ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++int ++main (int argc, char *argv[]) ++{ ++ pthread_t writer; ++ pthread_t reader; ++ int res; ++ ++ res = pthread_create (&writer, NULL, writer_entry, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ res = pthread_create (&reader, NULL, reader_entry, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ res = pthread_join (writer, NULL); ++ if (res < 0) ++ __builtin_trap (); ++ ++ return 0; ++} +-- +2.43.0 + diff --git a/0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch b/0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch new file mode 100644 index 0000000..42c232f --- /dev/null +++ b/0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch @@ -0,0 +1,161 @@ +From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Thu, 16 Nov 2023 20:31:09 +0800 +Subject: [PATCH 028/188] LoongArch: Fix scan-assembler-times of lasx/lsx test + case. + +These tests fail when they are first added,this patch adjusts the scan-assembler-times +to fix them. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-vcond-1.c | 12 +++---- + .../loongarch/vector/lasx/lasx-vcond-2.c | 36 +++++++++---------- + .../loongarch/vector/lsx/lsx-vcond-1.c | 12 +++---- + .../loongarch/vector/lsx/lsx-vcond-2.c | 36 +++++++++---------- + 4 files changed, 48 insertions(+), 48 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +index ee9cb1a1f..57064eac9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +index 5f40ed44c..55d5a084c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +index 138adccfa..8c69f0d9b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c +@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR) + + /* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */ + /* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +index e8fe31f8f..2214afd0a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -67,21 +67,21 @@ TEST_CMP (nule) + TEST_CMP (nuge) + TEST_CMP (nugt) + +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */ +-- +2.43.0 + diff --git a/0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch b/0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch new file mode 100644 index 0000000..27d00d5 --- /dev/null +++ b/0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch @@ -0,0 +1,45 @@ +From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Thu, 16 Nov 2023 16:44:36 +0800 +Subject: [PATCH 029/188] LoongArch: Increase cost of vector aligned + store/load. + +Based on SPEC2017 performance evaluation results, it's better to make them equal +to the cost of unaligned store/load so as to avoid odd alignment peeling. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_builtin_vectorization_cost): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8467f03cf..b6f0d61ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + case scalar_stmt: + case scalar_load: + case vector_stmt: +- case vector_load: + case vec_to_scalar: + case scalar_to_vec: + case scalar_store: +- case vector_store: + return 1; + + case vec_promote_demote: +@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + return LASX_SUPPORTED_MODE_P (mode) + && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1; + ++ case vector_load: ++ case vector_store: + case unaligned_load: + case unaligned_store: + return 2; +-- +2.43.0 + diff --git a/0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch b/0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch new file mode 100644 index 0000000..37cb901 --- /dev/null +++ b/0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch @@ -0,0 +1,58 @@ +From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Fri, 17 Nov 2023 10:38:02 +0800 +Subject: [PATCH 030/188] LoongArch: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO + +The LoongArch has defined ctz and clz on the backend, but if we want GCC +do CTZ transformation optimization in forwprop2 pass, GCC need to know +the value of c[lt]z at zero, which may be beneficial for some test cases +(like spec2017 deepsjeng_r). + +After implementing the macro, we test dynamic instruction count on +deepsjeng_r: +- before 1688423249186 +- after 1660311215745 (1.66% reduction) + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO): + Implement. + (CTZ_DEFINED_VALUE_AT_ZERO): Same. + +gcc/testsuite/ChangeLog: + + * gcc.dg/pr90838.c: add clz/ctz test support on LoongArch. +--- + gcc/config/loongarch/loongarch.h | 5 +++++ + gcc/testsuite/gcc.dg/pr90838.c | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6e8ac293a..19cf6fd33 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ ++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c +index 7502b8463..7aa912525 100644 +--- a/gcc/testsuite/gcc.dg/pr90838.c ++++ b/gcc/testsuite/gcc.dg/pr90838.c +@@ -82,3 +82,8 @@ int ctz4 (unsigned long x) + /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */ + /* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */ ++ ++/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */ +-- +2.43.0 + diff --git a/0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch b/0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch new file mode 100644 index 0000000..d5e2a3b --- /dev/null +++ b/0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch @@ -0,0 +1,197 @@ +From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 14 Nov 2023 00:17:19 +0800 +Subject: [PATCH 031/188] LoongArch: Handle vectorized copysign (x, -1) + expansion efficiently + +With LSX or LASX, copysign (x[i], -1) (or any negative constant) can be +vectorized using [x]vbitseti.{w/d} instructions to directly set the +signbits. + +Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion +efficiently" (r14-5289). + +gcc/ChangeLog: + + * config/loongarch/lsx.md (copysign3): Allow operand[2] to + be an reg_or_vector_same_val_operand. If it's a const vector + with same negative elements, expand the copysign with a bitset + instruction. Otherwise, force it into an register. + * config/loongarch/lasx.md (copysign3): Likewise. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-copysign-negconst.C: New test. + * g++.target/loongarch/vect-copysign-negconst-run.C: New test. +--- + gcc/config/loongarch/lasx.md | 22 ++++++++- + gcc/config/loongarch/lsx.md | 22 ++++++++- + .../loongarch/vect-copysign-negconst-run.C | 47 +++++++++++++++++++ + .../loongarch/vect-copysign-negconst.C | 27 +++++++++++ + 4 files changed, 116 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f0f2dd08d..2e11f0612 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3136,11 +3136,31 @@ + (match_operand:FLASX 1 "register_operand"))) + (set (match_dup 5) + (and:FLASX (match_dup 3) +- (match_operand:FLASX 2 "register_operand"))) ++ (match_operand:FLASX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLASX 0 "register_operand") + (ior:FLASX (match_dup 4) (match_dup 5)))] + "ISA_HAS_LASX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands[2])) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands[2]); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (mode) - 1); ++ operands[0] = lowpart_subreg (mode, operands[0], ++ mode); ++ operands[1] = lowpart_subreg (mode, operands[1], ++ mode); ++ emit_insn (gen_lasx_xvbitseti_ (operands[0], ++ operands[1], n)); ++ DONE; ++ } ++ } ++ ++ operands[2] = force_reg (mode, operands[2]); + operands[3] = loongarch_build_signbit_mask (mode, 1, 0); + + operands[4] = gen_reg_rtx (mode); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 55c7d79a0..8ea41c85b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2873,11 +2873,31 @@ + (match_operand:FLSX 1 "register_operand"))) + (set (match_dup 5) + (and:FLSX (match_dup 3) +- (match_operand:FLSX 2 "register_operand"))) ++ (match_operand:FLSX 2 "reg_or_vector_same_val_operand"))) + (set (match_operand:FLSX 0 "register_operand") + (ior:FLSX (match_dup 4) (match_dup 5)))] + "ISA_HAS_LSX" + { ++ /* copysign (x, -1) should instead be expanded as setting the sign ++ bit. */ ++ if (!REG_P (operands[2])) ++ { ++ rtx op2_elt = unwrap_const_vec_duplicate (operands[2]); ++ if (GET_CODE (op2_elt) == CONST_DOUBLE ++ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) ++ { ++ rtx n = GEN_INT (8 * GET_MODE_SIZE (mode) - 1); ++ operands[0] = lowpart_subreg (mode, operands[0], ++ mode); ++ operands[1] = lowpart_subreg (mode, operands[1], ++ mode); ++ emit_insn (gen_lsx_vbitseti_ (operands[0], operands[1], ++ n)); ++ DONE; ++ } ++ } ++ ++ operands[2] = force_reg (mode, operands[2]); + operands[3] = loongarch_build_signbit_mask (mode, 1, 0); + + operands[4] = gen_reg_rtx (mode); +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +new file mode 100644 +index 000000000..d2d5d15c9 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C +@@ -0,0 +1,47 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "vect-copysign-negconst.C" ++ ++double d[] = {1.2, -3.4, -5.6, 7.8}; ++float f[] = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810}; ++ ++double _abs(double x) { return __builtin_fabs (x); } ++float _abs(float x) { return __builtin_fabsf (x); } ++ ++template ++void ++check (T *arr, T *orig, int len) ++{ ++ for (int i = 0; i < len; i++) ++ { ++ if (arr[i] > 0) ++ __builtin_trap (); ++ if (_abs (arr[i]) != _abs (orig[i])) ++ __builtin_trap (); ++ } ++} ++ ++int ++main() ++{ ++ double test_d[4]; ++ float test_f[8]; ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<2> (test_d); ++ check (test_d, d, 2); ++ ++ __builtin_memcpy (test_d, d, sizeof (test_d)); ++ force_negative<4> (test_d); ++ check (test_d, d, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<4> (test_f); ++ check (test_f, f, 4); ++ ++ __builtin_memcpy (test_f, f, sizeof (test_f)); ++ force_negative<8> (test_f); ++ check (test_f, f, 8); ++} +diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +new file mode 100644 +index 000000000..5e8820d2b +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */ ++/* { dg-final { scan-assembler "\txvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\txvbitseti.*31" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*63" } } */ ++/* { dg-final { scan-assembler "\tvbitseti.*31" } } */ ++ ++template ++__attribute__ ((noipa)) void ++force_negative (float *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arr[i] = __builtin_copysignf (arr[i], -2); ++} ++ ++template ++__attribute__ ((noipa)) void ++force_negative (double *arr) ++{ ++ for (int i = 0; i < N; i++) ++ arr[i] = __builtin_copysign (arr[i], -3); ++} ++ ++template void force_negative<4>(float *); ++template void force_negative<8>(float *); ++template void force_negative<2>(double *); ++template void force_negative<4>(double *); +-- +2.43.0 + diff --git a/0032-LoongArch-Add-code-generation-support-for-call36-fun.patch b/0032-LoongArch-Add-code-generation-support-for-call36-fun.patch new file mode 100644 index 0000000..d4f59d8 --- /dev/null +++ b/0032-LoongArch-Add-code-generation-support-for-call36-fun.patch @@ -0,0 +1,561 @@ +From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 16 Nov 2023 15:06:11 +0800 +Subject: [PATCH 032/188] LoongArch: Add code generation support for call36 + function calls. + +When compiling with '-mcmodel=medium', the function call is made through +'pcaddu18i+jirl' if binutils supports call36, otherwise the +native implementation 'pcalau12i+jirl' is used. + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro. + * config/loongarch/loongarch.cc (loongarch_legitimize_call_address): + If binutils supports call36, the function call is not split over expand. + * config/loongarch/loongarch.md: Add call36 generation code. + * config/loongarch/predicates.md: Likewise. + * configure: Regenerate. + * configure.ac: Check whether binutils supports call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36, + the test is abandoned. + * gcc.target/loongarch/func-call-medium-6.c: Likewise. + * gcc.target/loongarch/func-call-medium-7.c: Likewise. + * gcc.target/loongarch/func-call-medium-8.c: Likewise. + * lib/target-supports.exp: Added a function to see if the assembler supports + the call36 relocation. + * gcc.target/loongarch/func-call-medium-call36-1.c: New test. + * gcc.target/loongarch/func-call-medium-call36.c: New test. + +Co-authored-by: Xi Ruoyao +--- + gcc/config.in | 6 + + gcc/config/loongarch/loongarch-opts.h | 4 + + gcc/config/loongarch/loongarch.cc | 12 +- + gcc/config/loongarch/loongarch.md | 171 +++++++++++++++--- + gcc/config/loongarch/predicates.md | 7 +- + gcc/configure | 32 ++++ + gcc/configure.ac | 6 + + .../gcc.target/loongarch/func-call-medium-5.c | 1 + + .../gcc.target/loongarch/func-call-medium-6.c | 1 + + .../gcc.target/loongarch/func-call-medium-7.c | 1 + + .../gcc.target/loongarch/func-call-medium-8.c | 1 + + .../loongarch/func-call-medium-call36-1.c | 21 +++ + .../loongarch/func-call-medium-call36.c | 32 ++++ + gcc/testsuite/lib/target-supports.exp | 9 + + 14 files changed, 268 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c + +diff --git a/gcc/config.in b/gcc/config.in +index 04968b53c..033cfb98b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -759,6 +759,12 @@ + #endif + + ++/* Define if your assembler supports call36 relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_SUPPORT_CALL36 ++#endif ++ ++ + /* Define if your assembler and linker support thread-local storage. */ + #ifndef USED_FOR_TARGET + #undef HAVE_AS_TLS +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index dfbe9dd5c..22ce1a122 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define HAVE_AS_EXPLICIT_RELOCS 0 + #endif + ++#ifndef HAVE_AS_SUPPORT_CALL36 ++#define HAVE_AS_SUPPORT_CALL36 0 ++#endif ++ + #ifndef HAVE_AS_MRELAX_OPTION + #define HAVE_AS_MRELAX_OPTION 0 + #endif +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b6f0d61ef..43f0e82ba 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr) + + enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr); + +- /* Split function call insn 'bl sym' or 'bl %plt(sym)' to : +- pcalau12i $rd, %pc_hi20(sym) +- jr $rd, %pc_lo12(sym). */ ++ /* If add the compilation option '-cmodel=medium', and the assembler does ++ not support call36. The following sequence of instructions will be ++ used for the function call: ++ pcalau12i $rd, %pc_hi20(sym) ++ jr $rd, %pc_lo12(sym) ++ */ + + if (TARGET_CMODEL_MEDIUM +- && TARGET_EXPLICIT_RELOCS ++ && !HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) + && (symbol_type == SYMBOL_PCREL + || (symbol_type == SYMBOL_GOT_DISP && flag_plt))) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed86c95bd..52e40a208 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3274,7 +3274,13 @@ + XEXP (target, 1), + operands[1])); + else +- emit_call_insn (gen_sibcall_internal (target, operands[1])); ++ { ++ rtx call = emit_call_insn (gen_sibcall_internal (target, operands[1])); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + DONE; + }) + +@@ -3282,10 +3288,25 @@ + [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%0 +- b\t%0 +- b\t%%plt(%0)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%0"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%0)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@sibcall_internal_1" +@@ -3318,9 +3339,17 @@ + operands[2], + arg2)); + else +- emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target, +- operands[2], +- arg2)); ++ { ++ rtx call ++ = emit_call_insn (gen_sibcall_value_multiple_internal (arg1, ++ target, ++ operands[2], ++ arg2)); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + else + { +@@ -3334,8 +3363,15 @@ + XEXP (target, 1), + operands[2])); + else +- emit_call_insn (gen_sibcall_value_internal (operands[0], target, +- operands[2])); ++ { ++ rtx call = emit_call_insn (gen_sibcall_value_internal (operands[0], ++ target, ++ operands[2])); ++ ++ if (TARGET_CMODEL_MEDIUM && !REG_P (target)) ++ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call), ++ gen_rtx_REG (Pmode, T0_REGNUM)); ++ } + } + DONE; + }) +@@ -3345,10 +3381,25 @@ + (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,b")) + (match_operand 2 "" "")))] + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%1 +- b\t%1 +- b\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@sibcall_value_internal_1" +@@ -3368,10 +3419,25 @@ + (call (mem:SI (match_dup 1)) + (match_dup 2)))] + "SIBLING_CALL_P (insn)" +- "@ +- jr\t%1 +- b\t%1 +- b\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jr\t%1"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0"; ++ else ++ return "b\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@sibcall_value_multiple_internal_1" +@@ -3411,10 +3477,25 @@ + (match_operand 1 "" "")) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +- "@ +- jirl\t$r1,%0,0 +- bl\t%0 +- bl\t%%plt(%0)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%0,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%0"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%0)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@call_internal_1" +@@ -3473,10 +3554,25 @@ + (match_operand 2 "" ""))) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +- "@ +- jirl\t$r1,%1,0 +- bl\t%1 +- bl\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%1,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@call_value_internal_1" +@@ -3498,10 +3594,25 @@ + (match_dup 2))) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +- "@ +- jirl\t$r1,%1,0 +- bl\t%1 +- bl\t%%plt(%1)" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "jirl\t$r1,%1,0"; ++ case 1: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%1"; ++ case 2: ++ if (TARGET_CMODEL_MEDIUM) ++ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0"; ++ else ++ return "bl\t%%plt(%1)"; ++ default: ++ gcc_unreachable (); ++ } ++} + [(set_attr "jirl" "indirect,direct,direct")]) + + (define_insn "@call_value_multiple_internal_1" +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 1d669f560..2aae87db4 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -443,7 +443,9 @@ + { + case SYMBOL_PCREL: + if (TARGET_CMODEL_EXTREME +- || (TARGET_CMODEL_MEDIUM && !TARGET_EXPLICIT_RELOCS)) ++ || (TARGET_CMODEL_MEDIUM ++ && HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else + return 1; +@@ -452,7 +454,8 @@ + if (TARGET_CMODEL_EXTREME + || !flag_plt + || (flag_plt && TARGET_CMODEL_MEDIUM +- && !TARGET_EXPLICIT_RELOCS)) ++ && HAVE_AS_SUPPORT_CALL36 ++ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else + return 1; +diff --git a/gcc/configure b/gcc/configure +index 09bacfec3..5842e7a18 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28836,6 +28836,38 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then + + $as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for call36 relocation support" >&5 ++$as_echo_n "checking assembler for call36 relocation support... " >&6; } ++if ${gcc_cv_as_loongarch_call36+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_call36=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'pcaddu18i $r1, %call36(a) ++ jirl $r1, $r1, 0' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_call36=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_call36" >&5 ++$as_echo "$gcc_cv_as_loongarch_call36" >&6; } ++if test $gcc_cv_as_loongarch_call36 = yes; then ++ ++$as_echo "#define HAVE_AS_SUPPORT_CALL36 1" >>confdefs.h ++ + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5 +diff --git a/gcc/configure.ac b/gcc/configure.ac +index a0999152e..9c3fd3ad6 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5329,6 +5329,12 @@ x: + [a:pcalau12i $t0,%pc_hi20(a)],, + [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1, + [Define if your assembler supports explicit relocation.])]) ++ gcc_GAS_CHECK_FEATURE([call36 relocation support], ++ gcc_cv_as_loongarch_call36,, ++ [pcaddu18i $r1, %call36(a) ++ jirl $r1, $r1, 0],, ++ [AC_DEFINE(HAVE_AS_SUPPORT_CALL36, 1, ++ [Define if your assembler supports call36 relocation.])]) + gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support], + gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,, + [.cfi_startproc +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c +index 8a47b5afc..cae880bd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c +index 1e75e60e0..33819542d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c +index 9e89085ca..969b59d04 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c +index fde9c6e0e..786ff395f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c +@@ -1,4 +1,5 @@ + /* { dg-do compile } */ ++/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */ + /* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c +new file mode 100644 +index 000000000..872ff32f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target loongarch_call36_support } */ ++/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */ ++/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r1,%call36\\(func\\)" } } */ ++/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r1,%call36\\(func_value\\)" } } */ ++ ++extern void func (void); ++int ++test (void) ++{ ++ func (); ++} ++ ++ ++extern int func_value (void); ++float ++test_value (void) ++{ ++ func_value (); ++} ++ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c +new file mode 100644 +index 000000000..98ccd260d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target loongarch_call36_support } */ ++/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */ ++/* { dg-final { scan-rtl-dump-times "\\(clobber \\(reg:DI 12 \\\$r12\\)\\)" 3 "final" } } */ ++/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r12,%call36\\(func\\)" } } */ ++/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r12,%call36\\(func_value\\)" } } */ ++/* { dg-final { scan-assembler "test_multi:.*pcaddu18i\t\\\$r12,%call36\\(func_multi\\)" } } */ ++ ++extern void func (void); ++void ++test (void) ++{ ++ func(); ++} ++ ++ ++extern int func_value (void); ++int ++test_value (void) ++{ ++ func_value (); ++} ++ ++struct t {float a; float b;}; ++ ++extern struct t func_multi (void); ++struct t ++test_multi (void) ++{ ++ func_multi (); ++} ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index bbe145c1c..b8bff1a31 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10573,6 +10573,15 @@ proc check_effective_target_loongarch_asx_hw { } { + } "-mlasx"] + } + ++# Check whether LoongArch binutils supports call36 relocation. ++proc check_effective_target_loongarch_call36_support { } { ++ return [check_no_compiler_messages loongarch_call36_support object { ++/* Assembly code */ ++ pcaddu18i $r1,%call36(a) ++ jirl $r1,$r1,0 ++ } ""] ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 + diff --git a/0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch b/0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch new file mode 100644 index 0000000..5b366d1 --- /dev/null +++ b/0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch @@ -0,0 +1,362 @@ +From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 17 Nov 2023 15:42:53 +0800 +Subject: [PATCH 033/188] LoongArch: Implement atomic operations using + LoongArch1.1 instructions. + +1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are + implemented using amadd{_db}.{b/h}. +2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange. +3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are + implemented using amswap{_db}.{b/h}. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Add comments. + * config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro. + * config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence): + Remove redundant code implementations. + * config/loongarch/sync.md (d): Added QI, HI support. + (atomic_add): New template. + (atomic_exchange_short): Likewise. + (atomic_cas_value_strong_amcas): Likewise.. + (atomic_fetch_add_short): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 2 + + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch.cc | 6 +- + gcc/config/loongarch/sync.md | 186 ++++++++++++++++++++------ + 4 files changed, 147 insertions(+), 49 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 4757de14b..078d8607d 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -54,7 +54,9 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings[]; ++/* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 ++/* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 22ce1a122..9b3d023ac 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + || la_target.isa.simd == ISA_EXT_SIMD_LASX) + #define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) + +- + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) + + /* Note: optimize_size may vary across functions, + while -m[no]-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 43f0e82ba..7bb46a45d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code) + static bool + loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) + { +- switch (model) ++ switch (memmodel_base (model)) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: +- case MEMMODEL_SYNC_SEQ_CST: + case MEMMODEL_RELEASE: +- case MEMMODEL_SYNC_RELEASE: + case MEMMODEL_ACQUIRE: +- case MEMMODEL_CONSUME: +- case MEMMODEL_SYNC_ACQUIRE: + return true; + + case MEMMODEL_RELAXED: +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index dd1f98946..1eabaec04 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -38,7 +38,7 @@ + [(plus "add") (ior "or") (xor "xor") (and "and")]) + + ;; This attribute gives the format suffix for atomic memory operations. +-(define_mode_attr amo [(SI "w") (DI "d")]) ++(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")]) + + ;; expands to the name of the atomic operand that implements a + ;; particular code. +@@ -123,7 +123,18 @@ + UNSPEC_SYNC_OLD_OP))] + "" + "am%A2.\t$zero,%z1,%0" +- [(set (attr "length") (const_int 8))]) ++ [(set (attr "length") (const_int 4))]) ++ ++(define_insn "atomic_add" ++ [(set (match_operand:SHORT 0 "memory_operand" "+ZB") ++ (unspec_volatile:SHORT ++ [(plus:SHORT (match_dup 0) ++ (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "ISA_BASE_IS_LA64V110" ++ "amadd%A2.\t$zero,%z1,%0" ++ [(set (attr "length") (const_int 4))]) + + (define_insn "atomic_fetch_" + [(set (match_operand:GPR 0 "register_operand" "=&r") +@@ -131,12 +142,12 @@ + (set (match_dup 1) + (unspec_volatile:GPR + [(any_atomic:GPR (match_dup 1) +- (match_operand:GPR 2 "reg_or_0_operand" "rJ")) ++ (match_operand:GPR 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + "am%A3.\t%0,%z2,%1" +- [(set (attr "length") (const_int 8))]) ++ [(set (attr "length") (const_int 4))]) + + (define_insn "atomic_exchange" + [(set (match_operand:GPR 0 "register_operand" "=&r") +@@ -148,7 +159,19 @@ + (match_operand:GPR 2 "register_operand" "r"))] + "" + "amswap%A3.\t%0,%z2,%1" +- [(set (attr "length") (const_int 8))]) ++ [(set (attr "length") (const_int 4))]) ++ ++(define_insn "atomic_exchange_short" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (unspec_volatile:SHORT ++ [(match_operand:SHORT 1 "memory_operand" "+ZB") ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_EXCHANGE)) ++ (set (match_dup 1) ++ (match_operand:SHORT 2 "register_operand" "r"))] ++ "ISA_BASE_IS_LA64V110" ++ "amswap%A3.\t%0,%z2,%1" ++ [(set (attr "length") (const_int 4))]) + + (define_insn "atomic_cas_value_strong" + [(set (match_operand:GPR 0 "register_operand" "=&r") +@@ -156,25 +179,36 @@ + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") +- (match_operand:SI 4 "const_int_operand") ;; mod_s +- (match_operand:SI 5 "const_int_operand")] ;; mod_f ++ (match_operand:SI 4 "const_int_operand")] ;; mod_s + UNSPEC_COMPARE_AND_SWAP)) +- (clobber (match_scratch:GPR 6 "=&r"))] ++ (clobber (match_scratch:GPR 5 "=&r"))] + "" + { + return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%6,$zero,%3\\n\\t" +- "sc.\\t%6,%1\\n\\t" +- "beqz\\t%6,1b\\n\\t" ++ "or%i3\\t%5,$zero,%3\\n\\t" ++ "sc.\\t%5,%1\\n\\t" ++ "beqz\\t%5,1b\\n\\t" + "b\\t3f\\n\\t" + "2:\\n\\t" +- "%G5\\n\\t" ++ "%G4\\n\\t" + "3:\\n\\t"; + } + [(set (attr "length") (const_int 28))]) + ++(define_insn "atomic_cas_value_strong_amcas" ++ [(set (match_operand:QHWD 0 "register_operand" "=&r") ++ (match_operand:QHWD 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ") ++ (match_operand:QHWD 3 "reg_or_0_operand" "rJ") ++ (match_operand:SI 4 "const_int_operand")] ;; mod_s ++ UNSPEC_COMPARE_AND_SWAP))] ++ "ISA_BASE_IS_LA64V110" ++ "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" ++ [(set (attr "length") (const_int 8))]) ++ + (define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:GPR 1 "register_operand" "") ;; val output +@@ -186,9 +220,29 @@ + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" + { +- emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], +- operands[3], operands[4], +- operands[6], operands[7])); ++ rtx mod_s, mod_f; ++ ++ mod_s = operands[6]; ++ mod_f = operands[7]; ++ ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_base (INTVAL (mod_f))) ++ && is_mm_release (memmodel_base (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands[6] = mod_s; ++ ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); ++ else ++ emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); + + rtx compare = operands[1]; + if (operands[3] != const0_rtx) +@@ -292,31 +346,53 @@ + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; +- loongarch_expand_atomic_qihi (generator, operands[1], operands[2], +- operands[3], operands[4], operands[7]); ++ rtx mod_s, mod_f; + +- rtx compare = operands[1]; +- if (operands[3] != const0_rtx) +- { +- machine_mode mode = GET_MODE (operands[3]); +- rtx op1 = convert_modes (SImode, mode, operands[1], true); +- rtx op3 = convert_modes (SImode, mode, operands[3], true); +- rtx difference = gen_rtx_MINUS (SImode, op1, op3); +- compare = gen_reg_rtx (SImode); +- emit_insn (gen_rtx_SET (compare, difference)); +- } ++ mod_s = operands[6]; ++ mod_f = operands[7]; + +- if (word_mode != mode) ++ /* Normally the succ memory model must be stronger than fail, but in the ++ unlikely event of fail being ACQUIRE and succ being RELEASE we need to ++ promote succ to ACQ_REL so that we don't lose the acquire semantics. */ ++ ++ if (is_mm_acquire (memmodel_base (INTVAL (mod_f))) ++ && is_mm_release (memmodel_base (INTVAL (mod_s)))) ++ mod_s = GEN_INT (MEMMODEL_ACQ_REL); ++ ++ operands[6] = mod_s; ++ ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6])); ++ else + { +- rtx reg = gen_reg_rtx (word_mode); +- emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); +- compare = reg; ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[1], operands[2], ++ operands[3], operands[4], operands[6]); + } + +- emit_insn (gen_rtx_SET (operands[0], +- gen_rtx_EQ (SImode, compare, const0_rtx))); ++ rtx compare = operands[1]; ++ if (operands[3] != const0_rtx) ++ { ++ machine_mode mode = GET_MODE (operands[3]); ++ rtx op1 = convert_modes (SImode, mode, operands[1], true); ++ rtx op3 = convert_modes (SImode, mode, operands[3], true); ++ rtx difference = gen_rtx_MINUS (SImode, op1, op3); ++ compare = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (compare, difference)); ++ } ++ ++ if (word_mode != mode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); ++ compare = reg; ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; + }) + +@@ -505,13 +581,31 @@ + (match_operand:SHORT 2 "register_operand"))] + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_exchange_7_si; +- loongarch_expand_atomic_qihi (generator, operands[0], operands[1], +- const0_rtx, operands[2], operands[3]); ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_exchange_short (operands[0], operands[1], operands[2], operands[3])); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_exchange_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ const0_rtx, operands[2], operands[3]); ++ } + DONE; + }) + ++(define_insn "atomic_fetch_add_short" ++ [(set (match_operand:SHORT 0 "register_operand" "=&r") ++ (match_operand:SHORT 1 "memory_operand" "+ZB")) ++ (set (match_dup 1) ++ (unspec_volatile:SHORT ++ [(plus:SHORT (match_dup 1) ++ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ UNSPEC_SYNC_OLD_OP))] ++ "ISA_BASE_IS_LA64V110" ++ "amadd%A3.\t%0,%z2,%1" ++ [(set (attr "length") (const_int 4))]) ++ + (define_expand "atomic_fetch_add" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) +@@ -523,10 +617,16 @@ + UNSPEC_SYNC_OLD_OP))] + "" + { +- union loongarch_gen_fn_ptrs generator; +- generator.fn_7 = gen_atomic_cas_value_add_7_si; +- loongarch_expand_atomic_qihi (generator, operands[0], operands[1], +- operands[1], operands[2], operands[3]); ++ if (ISA_BASE_IS_LA64V110) ++ emit_insn (gen_atomic_fetch_add_short (operands[0], operands[1], ++ operands[2], operands[3])); ++ else ++ { ++ union loongarch_gen_fn_ptrs generator; ++ generator.fn_7 = gen_atomic_cas_value_add_7_si; ++ loongarch_expand_atomic_qihi (generator, operands[0], operands[1], ++ operands[1], operands[2], operands[3]); ++ } + DONE; + }) + +-- +2.43.0 + diff --git a/0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch b/0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch new file mode 100644 index 0000000..24c0500 --- /dev/null +++ b/0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch @@ -0,0 +1,140 @@ +From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 17 Nov 2023 16:04:45 +0800 +Subject: [PATCH 034/188] LoongArch: atomic_load and atomic_store are + implemented using dbar grading. + +Because the la464 memory model design allows the same address load out of order, +so in the following test example, the Load of 23 lines may be executed first over +the load of 21 lines, resulting in an error. +So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by +"dbar 0x700" when implementing _atomic_load. + + 1 void * + 2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock) + 3 { + 4 int *intptr; + 5 uintptr_t oldval = 1; + 6 + 7 __atomic_compare_exchange_n (ptrlock, &oldval, 2, false, + 8 MEMMODEL_RELAXED, MEMMODEL_RELAXED); + 9 + 10 /* futex works on ints, not pointers. + 11 But a valid work share pointer will be at least + 12 8 byte aligned, so it is safe to assume the low + 13 32-bits of the pointer won't contain values 1 or 2. */ + 14 __asm volatile ("" : "=r" (intptr) : "0" (ptrlock)); + 15 #if __BYTE_ORDER == __BIG_ENDIAN + 16 if (sizeof (*ptrlock) > sizeof (int)) + 17 intptr += (sizeof (*ptrlock) / sizeof (int)) - 1; + 18 #endif + 19 do + 20 do_wait (intptr, 2); + 21 while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2); + 22 __asm volatile ("" : : : "memory"); + 23 return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE); + 24 } + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_load): New template. +--- + gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++--- + 1 file changed, 65 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 1eabaec04..f4673c856 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -30,6 +30,7 @@ + UNSPEC_SYNC_OLD_OP + UNSPEC_SYNC_EXCHANGE + UNSPEC_ATOMIC_STORE ++ UNSPEC_ATOMIC_LOAD + UNSPEC_MEMORY_BARRIER + ]) + +@@ -103,16 +104,75 @@ + + ;; Atomic memory operations. + ++(define_insn "atomic_load" ++ [(set (match_operand:QHWD 0 "register_operand" "=r") ++ (unspec_volatile:QHWD ++ [(match_operand:QHWD 1 "memory_operand" "+m") ++ (match_operand:SI 2 "const_int_operand")] ;; model ++ UNSPEC_ATOMIC_LOAD))] ++ "" ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands[2])); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x11\\n\\t" ++ "ld.\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_ACQUIRE: ++ return "ld.\t%0,%1\\n\\t" ++ "dbar\t0x14\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "ld.\t%0,%1\\n\\t" ++ "dbar\t0x700\\n\\t"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ __ATOMIC_CONSUME and __ATOMIC_ACQUIRE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. ++ ++ __atomic builtins doc: "Consume is implemented using the ++ stronger acquire memory order because of a deficiency in C++11's ++ semantics." See PR 59448 and get_memmodel in builtins.cc. */ ++ gcc_unreachable (); ++ } ++} ++ [(set (attr "length") (const_int 12))]) ++ + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. + (define_insn "atomic_store" +- [(set (match_operand:GPR 0 "memory_operand" "+ZB") +- (unspec_volatile:GPR +- [(match_operand:GPR 1 "reg_or_0_operand" "rJ") ++ [(set (match_operand:QHWD 0 "memory_operand" "+m") ++ (unspec_volatile:QHWD ++ [(match_operand:QHWD 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_STORE))] + "" +- "amswap%A2.\t$zero,%z1,%0" +- [(set (attr "length") (const_int 8))]) ++{ ++ enum memmodel model = memmodel_base (INTVAL (operands[2])); ++ ++ switch (model) ++ { ++ case MEMMODEL_SEQ_CST: ++ return "dbar\t0x12\\n\\t" ++ "st.\t%z1,%0\\n\\t" ++ "dbar\t0x18\\n\\t"; ++ case MEMMODEL_RELEASE: ++ return "dbar\t0x12\\n\\t" ++ "st.\t%z1,%0\\n\\t"; ++ case MEMMODEL_RELAXED: ++ return "st.\t%z1,%0"; ++ ++ default: ++ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, ++ and __ATOMIC_RELEASE. ++ The expand_builtin_atomic_store function converts all invalid memmodels ++ to MEMMODEL_SEQ_CST. */ ++ gcc_unreachable (); ++ } ++} ++ [(set (attr "length") (const_int 12))]) + + (define_insn "atomic_" + [(set (match_operand:GPR 0 "memory_operand" "+ZB") +-- +2.43.0 + diff --git a/0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch b/0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch new file mode 100644 index 0000000..91119cc --- /dev/null +++ b/0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch @@ -0,0 +1,615 @@ +From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 25 Oct 2024 02:08:03 +0000 +Subject: [PATCH 035/188] LoongArch: genopts: Add infrastructure to generate + code for new features in ISA evolution + +LoongArch v1.10 introduced the concept of ISA evolution. During ISA +evolution, many independent features can be added and enumerated via +CPUCFG. + +Add a data file into genopts storing the CPUCFG word, bit, the name +of the command line option controlling if this feature should be used +for compilation, and the text description. Make genstr.sh process these +info and add the command line options into loongarch.opt and +loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for +mapping CPUCFG output to the corresponding option. When handling +-march=native, use the information in loongarch-cpucfg-map.h to generate +the corresponding option mask. Enable the features implied by -march +setting unless the user has explicitly disabled the feature. + +The added options (-mdiv32 and -mld-seq-sa) are not really handled yet. +They'll be used in the following patches. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: New data file. + * config/loongarch/genopts/genstr.sh: Translate info in + isa-evolution.in when generating loongarch-str.h, loongarch.opt, + and loongarch-cpucfg-map.h. + * config/loongarch/genopts/loongarch.opt.in (isa_evolution): + New variable. + * config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New + rule. + (loongarch-str.h): Depend on isa-evolution.in. + (loongarch.opt): Depend on isa-evolution.in. + (loongarch-cpu.o): Depend on loongarch-cpucfg-map.h. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch-def.h (loongarch_isa): Add field + for evolution features. Add helper function to enable features + in this field. + Probe native CPU capability and save the corresponding options + into preset. + * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): + Probe native CPU capability and save the corresponding options + into preset. + (cache_cpucfg): Simplify with C++11-style for loop. + (cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ... + * config/loongarch/loongarch.cc + (loongarch_option_override_internal): Enable the ISA evolution + feature options implied by -march and not explicitly disabled. + (loongarch_asm_code_end): New function, print ISA information as + comments in the assembly if -fverbose-asm. It makes easier to + debug things like -march=native. + (TARGET_ASM_CODE_END): Define. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Generate. + (cpucfg_useful_idx, N_CPUCFG_WORDS) ... here. +--- + gcc/config/loongarch/genopts/genstr.sh | 92 ++++++++++++++++++- + gcc/config/loongarch/genopts/isa-evolution.in | 2 + + gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++ + gcc/config/loongarch/loongarch-cpu.cc | 46 +++++----- + gcc/config/loongarch/loongarch-cpucfg-map.h | 48 ++++++++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-str.h | 6 +- + gcc/config/loongarch/loongarch.cc | 31 +++++++ + gcc/config/loongarch/loongarch.opt | 20 +++- + gcc/config/loongarch/t-loongarch | 21 ++++- + 10 files changed, 244 insertions(+), 36 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in + create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 972ef125f..bcc616e98 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -25,8 +25,8 @@ cd "$(dirname "$0")" + # Generate a header containing definitions from the string table. + gen_defines() { + cat <. */ ++ ++#ifndef LOONGARCH_CPUCFG_MAP_H ++#define LOONGARCH_CPUCFG_MAP_H ++ ++#include "options.h" ++ ++static constexpr struct { ++ int cpucfg_word; ++ unsigned int cpucfg_bit; ++ HOST_WIDE_INT isa_evolution_bit; ++} cpucfg_map[] = { ++EOF ++ ++ # Generate the strings from isa-evolution.in. ++ awk '{ ++ gsub(/-/, "_", $3) ++ print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },") ++ }' isa-evolution.in ++ ++ echo "};" ++ echo ++ echo "static constexpr int cpucfg_useful_idx[] = {" ++ ++ awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") } ++ {if ($1+0 > max+0) max=$1; print(" "$1",")}' \ ++ isa-evolution.in | sort -n | uniq ++ ++ echo "};" ++ echo "" ++ ++ awk 'BEGIN { max=19 } ++ { if ($1+0 > max+0) max=$1 } ++ END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \ ++ isa-evolution.in ++ ++ echo "#endif /* LOONGARCH_CPUCFG_MAP_H */" + } + + main() { + case "$1" in ++ cpucfg-map) gen_cpucfg_map;; + header) gen_defines;; + opt) gen_options;; +- *) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;; ++ *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;; + esac + } + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +new file mode 100644 +index 000000000..e58f0d6a1 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -0,0 +1,2 @@ ++2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. ++3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index bd3cfaf60..a49de07c9 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -247,3 +247,10 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. ++ ++; Features added during ISA evolution. This concept is different from ISA ++; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the ++; explanation. These features may be implemented and enumerated with ++; CPUCFG independantly, so we use bit flags to specify them. ++Variable ++HOST_WIDE_INT isa_evolution = 0 +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index cbe52d7ed..e1cd85d02 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -29,12 +29,11 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def.h" + #include "loongarch-opts.h" + #include "loongarch-cpu.h" ++#include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + + /* Native CPU detection with "cpucfg" */ +-#define N_CPUCFG_WORDS 0x15 + static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 }; +-static const int cpucfg_useful_idx[] = {0, 1, 2, 16, 17, 18, 19}; + + static uint32_t + read_cpucfg_word (int wordno) +@@ -56,11 +55,8 @@ read_cpucfg_word (int wordno) + void + cache_cpucfg (void) + { +- for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++) +- { +- cpucfg_cache[cpucfg_useful_idx[i]] +- = read_cpucfg_word (cpucfg_useful_idx[i]); +- } ++ for (int idx: cpucfg_useful_idx) ++ cpucfg_cache[idx] = read_cpucfg_word (idx); + } + + uint32_t +@@ -125,11 +121,12 @@ fill_native_cpu_config (struct loongarch_target *tgt) + int tmp; + tgt->cpu_arch = native_cpu_type; + ++ auto &preset = loongarch_cpu_default_isa[tgt->cpu_arch]; ++ + /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].base + With: base architecture (ARCH) + At: cpucfg_words[1][1:0] */ + +- #define PRESET_ARCH (loongarch_cpu_default_isa[tgt->cpu_arch].base) + switch (cpucfg_cache[1] & 0x3) + { + case 0x02: +@@ -144,19 +141,18 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.base) + warning (0, "base architecture %qs differs from PRID preset %qs", + loongarch_isa_base_strings[tmp], +- loongarch_isa_base_strings[PRESET_ARCH]); ++ loongarch_isa_base_strings[preset.base]); + + /* Use the native value anyways. */ +- PRESET_ARCH = tmp; ++ preset.base = tmp; + + /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].fpu + With: FPU type (FP, FP_SP, FP_DP) + At: cpucfg_words[2][2:0] */ + +- #define PRESET_FPU (loongarch_cpu_default_isa[tgt->cpu_arch].fpu) + switch (cpucfg_cache[2] & 0x7) + { + case 0x07: +@@ -179,20 +175,19 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu) + warning (0, "floating-point unit %qs differs from PRID preset %qs", + loongarch_isa_ext_strings[tmp], +- loongarch_isa_ext_strings[PRESET_FPU]); ++ loongarch_isa_ext_strings[preset.fpu]); + + /* Use the native value anyways. */ +- PRESET_FPU = tmp; ++ preset.fpu = tmp; + + + /* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd + With: SIMD extension type (LSX, LASX) + At: cpucfg_words[2][7:6] */ + +- #define PRESET_SIMD (loongarch_cpu_default_isa[tgt->cpu_arch].simd) + switch (cpucfg_cache[2] & 0xc0) + { + case 0xc0: +@@ -219,14 +214,19 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Check consistency with PRID presets. */ + + /* +- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD) ++ if (native_cpu_type != CPU_NATIVE && tmp != preset.simd) + warning (0, "SIMD extension %qs differs from PRID preset %qs", + loongarch_isa_ext_strings[tmp], +- loongarch_isa_ext_strings[PRESET_SIMD]); ++ loongarch_isa_ext_strings[preset.simd]); + */ + + /* Use the native value anyways. */ +- PRESET_SIMD = tmp; ++ preset.simd = tmp; ++ ++ /* Features added during ISA evolution. */ ++ for (const auto &entry: cpucfg_map) ++ if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit) ++ preset.evolution |= entry.isa_evolution_bit; + } + + if (tune_native_p) +@@ -237,7 +237,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: cache size info + At: cpucfg_words[16:20][31:0] */ + +- #define PRESET_CACHE (loongarch_cpu_cache[tgt->cpu_tune]) ++ auto &preset_cache = loongarch_cpu_cache[tgt->cpu_tune]; + struct loongarch_cache native_cache; + int l1d_present = 0, l1u_present = 0; + int l2d_present = 0; +@@ -268,8 +268,8 @@ fill_native_cpu_config (struct loongarch_target *tgt) + >> 10; /* in kibibytes */ + + /* Use the native value anyways. */ +- PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size; +- PRESET_CACHE.l1d_size = native_cache.l1d_size; +- PRESET_CACHE.l2d_size = native_cache.l2d_size; ++ preset_cache.l1d_line_size = native_cache.l1d_line_size; ++ preset_cache.l1d_size = native_cache.l1d_size; ++ preset_cache.l2d_size = native_cache.l2d_size; + } + } +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +new file mode 100644 +index 000000000..0c078c397 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -0,0 +1,48 @@ ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef LOONGARCH_CPUCFG_MAP_H ++#define LOONGARCH_CPUCFG_MAP_H ++ ++#include "options.h" ++ ++static constexpr struct { ++ int cpucfg_word; ++ unsigned int cpucfg_bit; ++ HOST_WIDE_INT isa_evolution_bit; ++} cpucfg_map[] = { ++ { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, ++ { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, ++}; ++ ++static constexpr int cpucfg_useful_idx[] = { ++ 0, ++ 1, ++ 2, ++ 3, ++ 16, ++ 17, ++ 18, ++ 19, ++}; ++ ++static constexpr int N_CPUCFG_WORDS = 20; ++#endif /* LOONGARCH_CPUCFG_MAP_H */ +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 078d8607d..cb99caebe 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_DEF_H + #define LOONGARCH_DEF_H + ++#include + #include "loongarch-tune.h" + + #ifdef __cplusplus +@@ -121,6 +122,12 @@ struct loongarch_isa + int base; /* ISA_BASE_ */ + int fpu; /* ISA_EXT_FPU_ */ + int simd; /* ISA_EXT_SIMD_ */ ++ ++ /* ISA evolution features implied by -march=, for -march=native probed ++ via CPUCFG. The features implied by base may be not included here. ++ ++ Using int64_t instead of HOST_WIDE_INT for C compatibility. */ ++ int64_t evolution; + }; + + struct loongarch_abi +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 037e9e583..cd9dbb41b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -1,5 +1,5 @@ +-/* Generated automatically by "genstr" from "loongarch-strings". +- Please do not edit this file directly. ++/* Generated automatically by "genstr" from "loongarch-strings" and ++ "isa-evolution.in". Please do not edit this file directly. + + Copyright (C) 2021-2022 Free Software Foundation, Inc. + Contributed by Loongson Ltd. +@@ -69,4 +69,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_NONE "none" + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + ++#define OPTSTR_DIV32 "div32" ++#define OPTSTR_LD_SEQ_SA "ld-seq-sa" + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7bb46a45d..8bd46da62 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7451,6 +7451,10 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (loongarch_branch_cost == 0) + loongarch_branch_cost = loongarch_cost->branch_cost; + ++ /* If the user hasn't disabled a feature added during ISA evolution, ++ use the processor's default. */ ++ isa_evolution |= (la_target.isa.evolution & ++ ~global_options_set.x_isa_evolution); + + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 +@@ -11427,6 +11431,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + is_packed); + } + ++/* If -fverbose-asm, dump some info for debugging. */ ++static void ++loongarch_asm_code_end (void) ++{ ++#define DUMP_FEATURE(PRED) \ ++ fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \ ++ (PRED) ? "enabled" : "disabled") ++ ++ if (flag_verbose_asm) ++ { ++ fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START, ++ loongarch_cpu_strings [la_target.cpu_arch]); ++ fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START, ++ loongarch_cpu_strings [la_target.cpu_tune]); ++ fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, ++ loongarch_isa_base_strings [la_target.isa.base]); ++ DUMP_FEATURE (TARGET_DIV32); ++ DUMP_FEATURE (TARGET_LD_SEQ_SA); ++ } ++ ++ fputs ("\n\n", asm_out_file); ++#undef DUMP_FEATURE ++} ++ + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +@@ -11446,6 +11474,9 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + #undef TARGET_ASM_FUNCTION_RODATA_SECTION + #define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section + ++#undef TARGET_ASM_CODE_END ++#define TARGET_ASM_CODE_END loongarch_asm_code_end ++ + #undef TARGET_SCHED_INIT + #define TARGET_SCHED_INIT loongarch_sched_init + #undef TARGET_SCHED_REORDER +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index d936954b8..5251f705d 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -1,9 +1,10 @@ + ; Generated by "genstr" from the template "loongarch.opt.in" +-; and definitions from "loongarch-strings". ++; and definitions from "loongarch-strings" and "isa-evolution.in". + ; + ; Please do not edit this file directly. + ; It will be automatically updated during a gcc build +-; if you change "loongarch.opt.in" or "loongarch-strings". ++; if you change "loongarch.opt.in", "loongarch-strings", or ++; "isa-evolution.in". + ; + ; Copyright (C) 2021-2022 Free Software Foundation, Inc. + ; +@@ -254,3 +255,18 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. ++ ++; Features added during ISA evolution. This concept is different from ISA ++; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the ++; explanation. These features may be implemented and enumerated with ++; CPUCFG independantly, so we use bit flags to specify them. ++Variable ++HOST_WIDE_INT isa_evolution = 0 ++ ++mdiv32 ++Target Mask(ISA_DIV32) Var(isa_evolution) ++Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. ++ ++mld-seq-sa ++Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) ++Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 12734c37b..57b1176bc 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -18,8 +18,9 @@ + + + GTM_H += loongarch-multilib.h +-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ +- $(srcdir)/config/loongarch/loongarch-tune.h ++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ ++ $(srcdir)/config/loongarch/loongarch-tune.h \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + + # Canonical target triplet from config.gcc + LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\ +@@ -31,7 +32,8 @@ LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h + # String definition header + $(LA_STR_H): s-loongarch-str ; @true + s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \ +- $(srcdir)/config/loongarch/genopts/loongarch-strings ++ $(srcdir)/config/loongarch/genopts/loongarch-strings \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in + $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \ + $(srcdir)/config/loongarch/genopts/loongarch-strings > \ + tmp-loongarch-str.h +@@ -58,7 +60,8 @@ loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.cc $(LA_STR_H) + loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +-loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) ++loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) +@@ -67,6 +70,7 @@ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in \ + $(srcdir)/config/loongarch/genopts/loongarch-strings $(LA_STR_H) + $(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ +@@ -74,3 +78,12 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \ + $(srcdir)/config/loongarch/loongarch.opt + $(STAMP) s-loongarch-opt ++ ++$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map ++ @true ++s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in ++ $(SHELL) $< cpucfg-map > tmp-cpucfg.h ++ $(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \ ++ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(STAMP) $@ +-- +2.43.0 + diff --git a/0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch b/0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch new file mode 100644 index 0000000..ba0c123 --- /dev/null +++ b/0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch @@ -0,0 +1,148 @@ +From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 18 Nov 2023 03:19:07 +0800 +Subject: [PATCH 036/188] LoongArch: Add evolution features of base ISA + revisions + + * config/loongarch/loongarch-def.h: + (loongarch_isa_base_features): Declare. Define it in ... + * config/loongarch/loongarch-cpu.cc + (loongarch_isa_base_features): ... here. + (fill_native_cpu_config): If we know the base ISA of the CPU + model from PRID, use it instead of la64 (v1.0). Check if all + expected features of this base ISA is available, emit a warning + if not. + * config/loongarch/loongarch-opts.cc (config_target_isa): Enable + the features implied by the base ISA if not -march=native. +--- + gcc/config/loongarch/loongarch-cpu.cc | 62 ++++++++++++++++++-------- + gcc/config/loongarch/loongarch-def.h | 5 +++ + gcc/config/loongarch/loongarch-opts.cc | 3 ++ + 3 files changed, 52 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index e1cd85d02..76d66fa55 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + ++/* loongarch_isa_base_features defined here instead of loongarch-def.c ++ because we need to use options.h. Pay attention on the order of elements ++ in the initializer becaue ISO C++ does not allow C99 designated ++ initializers! */ ++ ++#define ISA_BASE_LA64V110_FEATURES \ ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ ++int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = { ++ /* [ISA_BASE_LA64V100] = */ 0, ++ /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES, ++}; ++ + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 }; + +@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: base architecture (ARCH) + At: cpucfg_words[1][1:0] */ + +- switch (cpucfg_cache[1] & 0x3) +- { +- case 0x02: +- tmp = ISA_BASE_LA64V100; +- break; +- +- default: +- fatal_error (UNKNOWN_LOCATION, +- "unknown native base architecture %<0x%x%>, " +- "%qs failed", (unsigned int) (cpucfg_cache[1] & 0x3), +- "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); +- } +- +- /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != preset.base) +- warning (0, "base architecture %qs differs from PRID preset %qs", +- loongarch_isa_base_strings[tmp], +- loongarch_isa_base_strings[preset.base]); ++ if (native_cpu_type != CPU_NATIVE) ++ tmp = loongarch_cpu_default_isa[native_cpu_type].base; ++ else ++ switch (cpucfg_cache[1] & 0x3) ++ { ++ case 0x02: ++ tmp = ISA_BASE_LA64V100; ++ break; ++ ++ default: ++ fatal_error (UNKNOWN_LOCATION, ++ "unknown native base architecture %<0x%x%>, " ++ "%qs failed", ++ (unsigned int) (cpucfg_cache[1] & 0x3), ++ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); ++ } + + /* Use the native value anyways. */ + preset.base = tmp; +@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt) + for (const auto &entry: cpucfg_map) + if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit) + preset.evolution |= entry.isa_evolution_bit; ++ ++ if (native_cpu_type != CPU_NATIVE) ++ { ++ /* Check if the local CPU really supports the features of the base ++ ISA of probed native_cpu_type. If any feature is not detected, ++ either GCC or the hardware is buggy. */ ++ auto base_isa_feature = loongarch_isa_base_features[preset.base]; ++ if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ warning (0, ++ "detected base architecture %qs, but some of its " ++ "features are not detected; the detected base " ++ "architecture may be unreliable, only detected " ++ "features will be enabled", ++ loongarch_isa_base_strings[preset.base]); ++ } + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index cb99caebe..ca0a324dd 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,12 +55,17 @@ extern "C" { + + /* enum isa_base */ + extern const char* loongarch_isa_base_strings[]; ++ + /* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 + /* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is ++ we cannot use the C++ header options.h in loongarch-def.c. */ ++extern int64_t loongarch_isa_base_features[]; ++ + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings[]; + #define ISA_EXT_NONE 0 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index f10a9d3ff..390720479 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -284,6 +284,9 @@ config_target_isa: + /* Get default ISA from "-march" or its default value. */ + t.isa = loongarch_cpu_default_isa[t.cpu_arch]; + ++ if (t.cpu_arch != CPU_NATIVE) ++ t.isa.evolution |= loongarch_isa_base_features[t.isa.base]; ++ + /* Apply incremental changes. */ + /* "-march=native" overrides the default FPU type. */ + +-- +2.43.0 + diff --git a/0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch b/0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch new file mode 100644 index 0000000..28f3226 --- /dev/null +++ b/0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch @@ -0,0 +1,156 @@ +From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 16 Nov 2023 09:21:47 +0800 +Subject: [PATCH 037/188] LoongArch: Take the advantage of -mdiv32 if it's + enabled + +With -mdiv32, we can assume div.w[u] and mod.w[u] works on low 32 bits +of a 64-bit GPR even if it's not sign-extended. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (DIV): New mode iterator. + (3): Don't expand if TARGET_DIV32. + (di3_fake): Disable if TARGET_DIV32. + (*3): Allow SImode if TARGET_DIV32. + (si3_extended): New insn if TARGET_DIV32. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/div-div32.c: New test. + * gcc.target/loongarch/div-no-div32.c: New test. +--- + gcc/config/loongarch/loongarch.md | 31 ++++++++++++++++--- + .../gcc.target/loongarch/div-div32.c | 31 +++++++++++++++++++ + .../gcc.target/loongarch/div-no-div32.c | 11 +++++++ + 3 files changed, 68 insertions(+), 5 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 52e40a208..c4e7af107 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -408,6 +408,10 @@ + ;; st.w. + (define_mode_iterator ST_ANY [QHWD ANYF]) + ++;; A mode for anything legal as a input of a div or mod instruction. ++(define_mode_iterator DIV [(DI "TARGET_64BIT") ++ (SI "!TARGET_64BIT || TARGET_DIV32")]) ++ + ;; In GPR templates, a string like "mul." will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. + (define_mode_attr d [(SI "w") (DI "d")]) +@@ -914,7 +918,7 @@ + (match_operand:GPR 2 "register_operand")))] + "" + { +- if (GET_MODE (operands[0]) == SImode && TARGET_64BIT) ++ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !TARGET_DIV32) + { + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); +@@ -934,9 +938,9 @@ + }) + + (define_insn "*3" +- [(set (match_operand:X 0 "register_operand" "=r,&r,&r") +- (any_div:X (match_operand:X 1 "register_operand" "r,r,0") +- (match_operand:X 2 "register_operand" "r,r,r")))] ++ [(set (match_operand:DIV 0 "register_operand" "=r,&r,&r") ++ (any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0") ++ (match_operand:DIV 2 "register_operand" "r,r,r")))] + "" + { + return loongarch_output_division (".\t%0,%1,%2", operands); +@@ -949,6 +953,23 @@ + (const_string "yes") + (const_string "no")))]) + ++(define_insn "si3_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r,&r,&r") ++ (sign_extend ++ (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") ++ (match_operand:SI 2 "register_operand" "r,r,r"))))] ++ "TARGET_64BIT && TARGET_DIV32" ++{ ++ return loongarch_output_division (".w\t%0,%1,%2", operands); ++} ++ [(set_attr "type" "idiv") ++ (set_attr "mode" "SI") ++ (set (attr "enabled") ++ (if_then_else ++ (match_test "!!which_alternative == loongarch_check_zero_div_p()") ++ (const_string "yes") ++ (const_string "no")))]) ++ + (define_insn "di3_fake" + [(set (match_operand:DI 0 "register_operand" "=r,&r,&r") + (sign_extend:DI +@@ -957,7 +978,7 @@ + (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0") + (match_operand:DI 2 "register_operand" "r,r,r")) 0)] + UNSPEC_FAKE_ANY_DIV)))] +- "TARGET_64BIT" ++ "TARGET_64BIT && !TARGET_DIV32" + { + return loongarch_output_division (".w\t%0,%1,%2", operands); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c +new file mode 100644 +index 000000000..8b1f686ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */ ++ ++int ++divw (long a, long b) ++{ ++ return (int)a / (int)b; ++} ++ ++unsigned int ++divwu (long a, long b) ++{ ++ return (unsigned int)a / (unsigned int)b; ++} ++ ++int ++modw (long a, long b) ++{ ++ return (int)a % (int)b; ++} ++ ++unsigned int ++modwu (long a, long b) ++{ ++ return (unsigned int)a % (unsigned int)b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +new file mode 100644 +index 000000000..f0f697ba5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "div\.w" } } */ ++/* { dg-final { scan-assembler "div\.wu" } } */ ++/* { dg-final { scan-assembler "mod\.w" } } */ ++/* { dg-final { scan-assembler "mod\.wu" } } */ ++ ++/* -mno-div32 should be implied by -march=loongarch64. */ ++/* { dg-final { scan-assembler-times "slli\.w\[^\n\]*0" 8 } } */ ++ ++#include "div-div32.c" +-- +2.43.0 + diff --git a/0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch b/0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch new file mode 100644 index 0000000..9696f85 --- /dev/null +++ b/0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch @@ -0,0 +1,61 @@ +From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 16 Nov 2023 09:30:14 +0800 +Subject: [PATCH 038/188] LoongArch: Don't emit dbar 0x700 if -mld-seq-sa + +This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that +two loads on the same address won't be reordered with each other". Thus +we can omit the "load-load" barrier dbar 0x700. + +This is only a micro-optimization because dbar 0x700 is already treated +as nop if the hardware supports LD_SEQ_SA. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand): Don't + print dbar 0x700 if TARGET_LD_SEQ_SA. + * config/loongarch/sync.md (atomic_load): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/sync.md | 9 +++++---- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8bd46da62..c86b787c4 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); +- else ++ else if (!TARGET_LD_SEQ_SA) + fputs ("dbar\t0x700", file); + break; + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index f4673c856..65443c899 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -119,13 +119,14 @@ + case MEMMODEL_SEQ_CST: + return "dbar\t0x11\\n\\t" + "ld.\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_ACQUIRE: + return "ld.\t%0,%1\\n\\t" +- "dbar\t0x14\\n\\t"; ++ "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return "ld.\t%0,%1\\n\\t" +- "dbar\t0x700\\n\\t"; ++ return TARGET_LD_SEQ_SA ? "ld.\t%0,%1\\n\\t" ++ : "ld.\t%0,%1\\n\\t" ++ "dbar\t0x700"; + + default: + /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, +-- +2.43.0 + diff --git a/0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch b/0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch new file mode 100644 index 0000000..03b2894 --- /dev/null +++ b/0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch @@ -0,0 +1,208 @@ +From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 25 Oct 2024 02:13:53 +0000 +Subject: [PATCH 039/188] LoongArch: Add fine-grained control for LAM_BH and + LAMCAS + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas): + Add. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-cpu.cc + (ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH + and OPTION_MASK_ISA_LAMCAS. + * config/loongarch/sync.md (atomic_add): Use + TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110. Remove empty + lines from assembly output. + (atomic_exchange_short): Likewise. + (atomic_exchange): Likewise. + (atomic_fetch_add_short): Likewise. + (atomic_fetch_add): Likewise. + (atomic_cas_value_strong_amcas): Use TARGET_LAMCAS instead + of ISA_BASE_IS_LA64V110. + (atomic_compare_and_swap): Likewise. + (atomic_compare_and_swap): Likewise. + (atomic_compare_and_swap): Likewise. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump + status if -mlam-bh and -mlamcas if -fverbose-asm. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 2 ++ + gcc/config/loongarch/loongarch-cpu.cc | 3 ++- + gcc/config/loongarch/loongarch-cpucfg-map.h | 2 ++ + gcc/config/loongarch/loongarch-str.h | 2 ++ + gcc/config/loongarch/loongarch.cc | 2 ++ + gcc/config/loongarch/loongarch.opt | 8 ++++++++ + gcc/config/loongarch/sync.md | 18 +++++++++--------- + 7 files changed, 27 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index e58f0d6a1..a6bc3f87f 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,2 +1,4 @@ + 2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. ++2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions. ++2 28 lamcas Support amcas[_db].{b/h/w/d} instructions. + 3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 76d66fa55..bbce82c9c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3. If not see + initializers! */ + + #define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA) ++ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) + + int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = { + /* [ISA_BASE_LA64V100] = */ 0, +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +index 0c078c397..02ff16712 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -30,6 +30,8 @@ static constexpr struct { + HOST_WIDE_INT isa_evolution_bit; + } cpucfg_map[] = { + { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, ++ { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, ++ { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, + { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, + }; + +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index cd9dbb41b..0fee9abe5 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + + #define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" + #define OPTSTR_LD_SEQ_SA "ld-seq-sa" + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86b787c4..33d23a731 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void) + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings [la_target.isa.base]); + DUMP_FEATURE (TARGET_DIV32); ++ DUMP_FEATURE (TARGET_LAM_BH); ++ DUMP_FEATURE (TARGET_LAMCAS); + DUMP_FEATURE (TARGET_LD_SEQ_SA); + } + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 5251f705d..ea0d5bb4e 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -267,6 +267,14 @@ mdiv32 + Target Mask(ISA_DIV32) Var(isa_evolution) + Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. + ++mlam-bh ++Target Mask(ISA_LAM_BH) Var(isa_evolution) ++Support am{swap/add}[_db].{b/h} instructions. ++ ++mlamcas ++Target Mask(ISA_LAMCAS) Var(isa_evolution) ++Support amcas[_db].{b/h/w/d} instructions. ++ + mld-seq-sa + Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) + Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 65443c899..a678e7131 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -124,7 +124,7 @@ + return "ld.\t%0,%1\\n\\t" + "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return TARGET_LD_SEQ_SA ? "ld.\t%0,%1\\n\\t" ++ return TARGET_LD_SEQ_SA ? "ld.\t%0,%1" + : "ld.\t%0,%1\\n\\t" + "dbar\t0x700"; + +@@ -193,7 +193,7 @@ + (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A2.\t$zero,%z1,%0" + [(set (attr "length") (const_int 4))]) + +@@ -230,7 +230,7 @@ + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand" "r"))] +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amswap%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 4))]) + +@@ -266,7 +266,7 @@ + (match_operand:QHWD 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand")] ;; mod_s + UNSPEC_COMPARE_AND_SWAP))] +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAMCAS" + "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" + [(set (attr "length") (const_int 8))]) + +@@ -296,7 +296,7 @@ + + operands[6] = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], + operands[3], operands[4], + operands[6])); +@@ -422,7 +422,7 @@ + + operands[6] = mod_s; + +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAMCAS) + emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], + operands[3], operands[4], + operands[6])); +@@ -642,7 +642,7 @@ + (match_operand:SHORT 2 "register_operand"))] + "" + { +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAM_BH) + emit_insn (gen_atomic_exchange_short (operands[0], operands[1], operands[2], operands[3])); + else + { +@@ -663,7 +663,7 @@ + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] +- "ISA_BASE_IS_LA64V110" ++ "TARGET_LAM_BH" + "amadd%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 4))]) + +@@ -678,7 +678,7 @@ + UNSPEC_SYNC_OLD_OP))] + "" + { +- if (ISA_BASE_IS_LA64V110) ++ if (TARGET_LAM_BH) + emit_insn (gen_atomic_fetch_add_short (operands[0], operands[1], + operands[2], operands[3])); + else +-- +2.43.0 + diff --git a/0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch b/0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch new file mode 100644 index 0000000..809b646 --- /dev/null +++ b/0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch @@ -0,0 +1,50 @@ +From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 19 Nov 2023 01:41:12 +0800 +Subject: [PATCH 040/188] LoongArch: Fix "-mexplict-relocs=none + -mcmodel=medium" producing %call36 when the assembler does not support it + +Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still +return false when -mexplict-relocs=none -mcmodel=medium to make +loongarch_legitimize_call_address emit la.local or la.global. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (const_call_insn_operand): + Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions. Change "1" to + "true" to make the coding style consistent. +--- + gcc/config/loongarch/predicates.md | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 2aae87db4..30a0dee9f 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -444,21 +444,19 @@ + case SYMBOL_PCREL: + if (TARGET_CMODEL_EXTREME + || (TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + case SYMBOL_GOT_DISP: + if (TARGET_CMODEL_EXTREME + || !flag_plt + || (flag_plt && TARGET_CMODEL_MEDIUM +- && HAVE_AS_SUPPORT_CALL36 + && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE))) + return false; + else +- return 1; ++ return true; + + default: + return false; +-- +2.43.0 + diff --git a/0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch b/0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch new file mode 100644 index 0000000..b34be72 --- /dev/null +++ b/0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch @@ -0,0 +1,43 @@ +From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Sat, 18 Nov 2023 11:04:42 +0800 +Subject: [PATCH 041/188] LoongArch: Modify MUSL_DYNAMIC_LINKER. + +Use no suffix at all in the musl dynamic linker name for hard +float ABI. Use -sf and -sp suffixes in musl dynamic linker name +for soft float and single precision ABIs. The following table +outlines the musl interpreter names for the LoongArch64 ABI names. + +musl interpreter | LoongArch64 ABI +--------------------------- | ----------------- +ld-musl-loongarch64.so.1 | loongarch64-lp64d +ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f +ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s + +gcc/ChangeLog: + + * config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix. +--- + gcc/config/loongarch/gnu-user.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h +index 60ef75601..9fc49dc8f 100644 +--- a/gcc/config/loongarch/gnu-user.h ++++ b/gcc/config/loongarch/gnu-user.h +@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3. If not see + "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1" + + #define MUSL_ABI_SPEC \ +- "%{mabi=lp64d:-lp64d}" \ +- "%{mabi=lp64f:-lp64f}" \ +- "%{mabi=lp64s:-lp64s}" ++ "%{mabi=lp64d:}" \ ++ "%{mabi=lp64f:-sp}" \ ++ "%{mabi=lp64s:-sf}" + + #undef MUSL_DYNAMIC_LINKER + #define MUSL_DYNAMIC_LINKER \ +-- +2.43.0 + diff --git a/0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch b/0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch new file mode 100644 index 0000000..2f6a8ad --- /dev/null +++ b/0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch @@ -0,0 +1,85 @@ +From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 21 Nov 2023 09:09:25 +0800 +Subject: [PATCH 042/188] LoongArch: Fix libgcc build failure when libc is not + available + +To use int64_t we included in loongarch-def.h. +Unfortunately, loongarch-def.h is also used by libgcc etc., causing a +build failure when building a "stage1" cross compiler at which the +target libc is not built yet. + +As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's +not directly or indirectly referred by the target libraries. So +guard everything requiring stdint.h with #if then they'll not block +target libraries. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (stdint.h): Guard with #if to + exclude it for target libraries. + (loongarch_isa_base_features): Likewise. + (loongarch_isa): Likewise. + (loongarch_abi): Likewise. + (loongarch_target): Likewise. + (loongarch_cpu_default_isa): Likewise. +--- + gcc/config/loongarch/loongarch-def.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index ca0a324dd..ef848f606 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_DEF_H + #define LOONGARCH_DEF_H + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include ++#endif ++ + #include "loongarch-tune.h" + + #ifdef __cplusplus +@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings[]; + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is + we cannot use the C++ header options.h in loongarch-def.c. */ + extern int64_t loongarch_isa_base_features[]; ++#endif + + /* enum isa_ext_* */ + extern const char* loongarch_isa_ext_strings[]; +@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings[]; + #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) + + ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Internal representation of the target. */ + struct loongarch_isa + { +@@ -150,6 +156,9 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + ++extern struct loongarch_isa loongarch_cpu_default_isa[]; ++#endif ++ + /* CPU properties. */ + /* index */ + #define CPU_NATIVE 0 +@@ -162,7 +171,6 @@ struct loongarch_target + + /* parallel tables. */ + extern const char* loongarch_cpu_strings[]; +-extern struct loongarch_isa loongarch_cpu_default_isa[]; + extern int loongarch_cpu_issue_rate[]; + extern int loongarch_cpu_multipass_dfa_lookahead[]; + +-- +2.43.0 + diff --git a/0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch b/0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch new file mode 100644 index 0000000..0a24290 --- /dev/null +++ b/0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch @@ -0,0 +1,148 @@ +From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 19 Nov 2023 06:12:22 +0800 +Subject: [PATCH 043/188] LoongArch: Optimize LSX vector shuffle on + floating-point vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The vec_perm expander was wrongly defined. GCC internal says: + +Operand 3 is the “selector”. It is an integral mode vector of the same +width and number of elements as mode M. + +But we made operand 3 in the same mode as the shuffled vectors, so it +would be a FP mode vector if the shuffled vectors are FP mode. + +With this mistake, the generic code manages to work around and it ends +up creating some very nasty code for a simple __builtin_shuffle (a, b, +c) where a and b are V4SF, c is V4SI: + + la.local $r12,.LANCHOR0 + la.local $r13,.LANCHOR1 + vld $vr1,$r12,48 + vslli.w $vr1,$vr1,2 + vld $vr2,$r12,16 + vld $vr0,$r13,0 + vld $vr3,$r13,16 + vshuf.b $vr0,$vr1,$vr1,$vr0 + vld $vr1,$r12,32 + vadd.b $vr0,$vr0,$vr3 + vandi.b $vr0,$vr0,31 + vshuf.b $vr0,$vr1,$vr2,$vr0 + vst $vr0,$r12,0 + jr $r1 + +This is obviously stupid. Fix the expander definition and adjust +loongarch_expand_vec_perm to handle it correctly. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (vec_perm): Make the + selector VIMODE. + * config/loongarch/loongarch.cc (loongarch_expand_vec_perm): + Use the mode of the selector (instead of the shuffled vector) + for truncating it. Operate on subregs in the selector mode if + the shuffled vector has a different mode (i. e. it's a + floating-point vector). + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-shuf-fp.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 18 ++++++++++-------- + gcc/config/loongarch/lsx.md | 2 +- + .../gcc.target/loongarch/vect-shuf-fp.c | 16 ++++++++++++++++ + 3 files changed, 27 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 33d23a731..d95ac68e8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8603,8 +8603,9 @@ void + loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + { + machine_mode vmode = GET_MODE (target); ++ machine_mode vimode = GET_MODE (sel); + auto nelt = GET_MODE_NUNITS (vmode); +- auto round_reg = gen_reg_rtx (vmode); ++ auto round_reg = gen_reg_rtx (vimode); + rtx round_data[MAX_VECT_LEN]; + + for (int i = 0; i < nelt; i += 1) +@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + round_data[i] = GEN_INT (0x1f); + } + +- rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data)); ++ rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data)); + emit_move_insn (round_reg, round_data_rtx); + ++ if (vmode != vimode) ++ { ++ target = lowpart_subreg (vimode, target, vmode); ++ op0 = lowpart_subreg (vimode, op0, vmode); ++ op1 = lowpart_subreg (vimode, op1, vmode); ++ } ++ + switch (vmode) + { + case E_V16QImode: +@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); + break; + case E_V2DFmode: +- emit_insn (gen_andv2di3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0)); +- break; + case E_V2DImode: + emit_insn (gen_andv2di3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0)); + break; + case E_V4SFmode: +- emit_insn (gen_andv4si3 (sel, sel, round_reg)); +- emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0)); +- break; + case E_V4SImode: + emit_insn (gen_andv4si3 (sel, sel, round_reg)); + emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0)); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 8ea41c85b..5e8d8d74b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -837,7 +837,7 @@ + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "register_operand") + (match_operand:LSX 2 "register_operand") +- (match_operand:LSX 3 "register_operand")] ++ (match_operand: 3 "register_operand")] + "ISA_HAS_LSX" + { + loongarch_expand_vec_perm (operands[0], operands[1], +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +new file mode 100644 +index 000000000..7acc2113a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++/* { dg-final { scan-assembler "vshuf\.w" } } */ ++ ++#define V __attribute__ ((vector_size (16))) ++ ++int a V; ++float b V; ++float c V; ++float d V; ++ ++void ++test (void) ++{ ++ d = __builtin_shuffle (b, c, a); ++} +-- +2.43.0 + diff --git a/0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch b/0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch new file mode 100644 index 0000000..9721a38 --- /dev/null +++ b/0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch @@ -0,0 +1,112 @@ +From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Thu, 23 Nov 2023 11:04:17 +0800 +Subject: [PATCH 044/188] LoongArch: Optimize the loading of immediate numbers + with the same high and low 32-bit values + +For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c: + + long long r = 0x0101010101010101; + +Before this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + lu32i.d $r15,0x1010100000000>>32 + lu52i.d $r15,$r15,0x100000000000000>>52 + +After this patch: + + lu12i.w $r15,16842752>>12 + ori $r15,$r15,257 + bstrins.d $r15,$r15,63,32 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (enum loongarch_load_imm_method): Add new method. + (loongarch_build_integer): Add relevant implementations for + new method. + (loongarch_move_integer): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/imm-load1.c: Change old check. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++++++- + .../gcc.target/loongarch/imm-load1.c | 3 ++- + 2 files changed, 23 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d95ac68e8..048d3802b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -142,12 +142,16 @@ struct loongarch_address_info + + METHOD_LU52I: + Load 52-63 bit of the immediate number. ++ ++ METHOD_MIRROR: ++ Copy 0-31 bit of the immediate number to 32-63bit. + */ + enum loongarch_load_imm_method + { + METHOD_NORMAL, + METHOD_LU32I, +- METHOD_LU52I ++ METHOD_LU52I, ++ METHOD_MIRROR + }; + + struct loongarch_integer_op +@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes, + + int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31; + int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51; ++ ++ uint32_t hival = (uint32_t) (value >> 32); ++ uint32_t loval = (uint32_t) value; ++ + /* Determine whether the upper 32 bits are sign-extended from the lower + 32 bits. If it is, the instructions to load the high order can be + ommitted. */ + if (lu32i[sign31] && lu52i[sign31]) + return cost; ++ /* If the lower 32 bits are the same as the upper 32 bits, just copy ++ the lower 32 bits to the upper 32 bits. */ ++ else if (loval == hival) ++ { ++ codes[cost].method = METHOD_MIRROR; ++ codes[cost].curr_value = value; ++ return cost + 1; ++ } + /* Determine whether bits 32-51 are sign-extended from the lower 32 + bits. If so, directly load 52-63 bits. */ + else if (lu32i[sign31]) +@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) + gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)), + GEN_INT (codes[i].value)); + break; ++ case METHOD_MIRROR: ++ gcc_assert (mode == DImode); ++ emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x)); ++ break; + default: + gcc_unreachable (); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +index 2ff029712..f64cc2956 100644 +--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c ++++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ ++/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */ + + + extern long long b[10]; +-- +2.43.0 + diff --git a/0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch b/0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch new file mode 100644 index 0000000..17ff895 --- /dev/null +++ b/0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch @@ -0,0 +1,30 @@ +From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001 +From: Guo Jie +Date: Thu, 23 Nov 2023 11:05:56 +0800 +Subject: [PATCH 045/188] LoongArch: Fix runtime error in a gcc build with + --with-build-config=bootstrap-ubsan + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_plus_constant): + avoid left shift of negative value -0x8000. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 048d3802b..ecceca22d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + else if (loongarch_addu16i_imm12_operand_p (v, mode)) + a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1); + else if (mode == DImode && DUAL_ADDU16I_OPERAND (v)) +- a = (v > 0 ? 0x7fff : -0x8000) << 16; ++ a = (v > 0 ? 0x7fff0000 : ~0x7fffffff); + else + gcc_unreachable (); + +-- +2.43.0 + diff --git a/0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch b/0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch new file mode 100644 index 0000000..f42552d --- /dev/null +++ b/0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch @@ -0,0 +1,1295 @@ +From d37308b7a62246e16ee61c40441548feb76761f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 18 Nov 2023 04:48:20 +0800 +Subject: [PATCH 046/188] LoongArch: Fix usage of LSX and LASX frint/ftint + instructions [PR112578] + +The usage LSX and LASX frint/ftint instructions had some problems: + +1. These instructions raises FE_INEXACT, which is not allowed with + -fno-fp-int-builtin-inexact for most C2x section F.10.6 functions + (the only exceptions are rint, lrint, and llrint). +2. The "frint" instruction without explicit rounding mode is used for + roundM2, this is incorrect because roundM2 is defined "rounding + operand 1 to the *nearest* integer, rounding away from zero in the + event of a tie". We actually don't have such an instruction. Our + frintrne instruction is roundevenM2 (unfortunately, this is not + documented). +3. These define_insn's are written in a way not so easy to hack. + +So I removed these instructions and created a "simd.md" file, then added +them and the corresponding expanders there. The advantage of the +simd.md file is we don't need to duplicate the RTL template twice (in +lsx.md and lasx.md). + +gcc/ChangeLog: + + PR target/112578 + * config/loongarch/lsx.md (UNSPEC_LSX_VFTINT_S, + UNSPEC_LSX_VFTINTRNE, UNSPEC_LSX_VFTINTRP, + UNSPEC_LSX_VFTINTRM, UNSPEC_LSX_VFRINTRNE_S, + UNSPEC_LSX_VFRINTRNE_D, UNSPEC_LSX_VFRINTRZ_S, + UNSPEC_LSX_VFRINTRZ_D, UNSPEC_LSX_VFRINTRP_S, + UNSPEC_LSX_VFRINTRP_D, UNSPEC_LSX_VFRINTRM_S, + UNSPEC_LSX_VFRINTRM_D): Remove. + (ILSX, FLSX): Move into ... + (VIMODE): Move into ... + (FRINT_S, FRINT_D): Remove. + (frint_pattern_s, frint_pattern_d, frint_suffix): Remove. + (lsx_vfrint_, lsx_vftint_s__, + lsx_vftintrne_w_s, lsx_vftintrne_l_d, lsx_vftintrp_w_s, + lsx_vftintrp_l_d, lsx_vftintrm_w_s, lsx_vftintrm_l_d, + lsx_vfrintrne_s, lsx_vfrintrne_d, lsx_vfrintrz_s, + lsx_vfrintrz_d, lsx_vfrintrp_s, lsx_vfrintrp_d, + lsx_vfrintrm_s, lsx_vfrintrm_d, + v4sf2, + v2df2, round2, + fix_trunc2): Remove. + * config/loongarch/lasx.md: Likewise. + * config/loongarch/simd.md: New file. + (ILSX, ILASX, FLSX, FLASX, VIMODE): ... here. + (IVEC, FVEC): New mode iterators. + (VIMODE): ... here. Extend it to work for all LSX/LASX vector + modes. + (x, wu, simd_isa, WVEC, vimode, simdfmt, simdifmt_for_f, + elebits): New mode attributes. + (UNSPEC_SIMD_FRINTRP, UNSPEC_SIMD_FRINTRZ, UNSPEC_SIMD_FRINT, + UNSPEC_SIMD_FRINTRM, UNSPEC_SIMD_FRINTRNE): New unspecs. + (SIMD_FRINT): New int iterator. + (simd_frint_rounding, simd_frint_pattern): New int attributes. + (_vfrint_): New + define_insn template for frint instructions. + (_vftint__): + Likewise, but for ftint instructions. + (2): New define_expand with + flag_fp_int_builtin_inexact checked. + (l2): Likewise. + (ftrunc2): New define_expand. It does not require + flag_fp_int_builtin_inexact. + (fix_trunc2): New define_insn_and_split. It does + not require flag_fp_int_builtin_inexact. + (include): Add lsx.md and lasx.md. + * config/loongarch/loongarch.md (include): Include simd.md, + instead of including lsx.md and lasx.md directly. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vftint_w_s, CODE_FOR_lsx_vftint_l_d, + CODE_FOR_lasx_xvftint_w_s, CODE_FOR_lasx_xvftint_l_d): + Remove. + +gcc/testsuite/ChangeLog: + + PR target/112578 + * gcc.target/loongarch/vect-frint.c: New test. + * gcc.target/loongarch/vect-frint-no-inexact.c: New test. + * gcc.target/loongarch/vect-ftint.c: New test. + * gcc.target/loongarch/vect-ftint-no-inexact.c: New test. +--- + gcc/config/loongarch/lasx.md | 239 ----------------- + gcc/config/loongarch/loongarch-builtins.cc | 4 - + gcc/config/loongarch/loongarch.md | 7 +- + gcc/config/loongarch/lsx.md | 243 ------------------ + gcc/config/loongarch/simd.md | 213 +++++++++++++++ + .../loongarch/vect-frint-no-inexact.c | 48 ++++ + .../gcc.target/loongarch/vect-frint.c | 85 ++++++ + .../loongarch/vect-ftint-no-inexact.c | 44 ++++ + .../gcc.target/loongarch/vect-ftint.c | 83 ++++++ + 9 files changed, 475 insertions(+), 491 deletions(-) + create mode 100644 gcc/config/loongarch/simd.md + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ftint.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 2e11f0612..d4a56c307 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -53,7 +53,6 @@ + UNSPEC_LASX_XVFCMP_SULT + UNSPEC_LASX_XVFCMP_SUN + UNSPEC_LASX_XVFCMP_SUNE +- UNSPEC_LASX_XVFTINT_S + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -92,12 +91,6 @@ + UNSPEC_LASX_XVEXTRINS + UNSPEC_LASX_XVMSKLTZ + UNSPEC_LASX_XVSIGNCOV +- UNSPEC_LASX_XVFTINTRNE_W_S +- UNSPEC_LASX_XVFTINTRNE_L_D +- UNSPEC_LASX_XVFTINTRP_W_S +- UNSPEC_LASX_XVFTINTRP_L_D +- UNSPEC_LASX_XVFTINTRM_W_S +- UNSPEC_LASX_XVFTINTRM_L_D + UNSPEC_LASX_XVFTINT_W_D + UNSPEC_LASX_XVFFINT_S_L + UNSPEC_LASX_XVFTINTRZ_W_D +@@ -116,14 +109,6 @@ + UNSPEC_LASX_XVFTINTRML_L_S + UNSPEC_LASX_XVFTINTRNEL_L_S + UNSPEC_LASX_XVFTINTRNEH_L_S +- UNSPEC_LASX_XVFRINTRNE_S +- UNSPEC_LASX_XVFRINTRNE_D +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRM_S +- UNSPEC_LASX_XVFRINTRM_D + UNSPEC_LASX_XVREPLVE0_Q + UNSPEC_LASX_XVPERM_W + UNSPEC_LASX_XVPERMI_Q +@@ -206,9 +191,6 @@ + ;; Only used for copy256_{u,s}.w. + (define_mode_iterator LASX_W [V8SI V8SF]) + +-;; Only integer modes in LASX. +-(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI]) +- + ;; As ILASX but excludes V32QI. + (define_mode_iterator ILASX_DWH [V4DI V8SI V16HI]) + +@@ -224,9 +206,6 @@ + ;; Only integer modes smaller than a word. + (define_mode_iterator ILASX_HB [V16HI V32QI]) + +-;; Only floating-point modes in LASX. +-(define_mode_iterator FLASX [V4DF V8SF]) +- + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF]) + +@@ -500,37 +479,6 @@ + (V16HI "w") + (V32QI "w")]) + +-(define_int_iterator FRINT256_S [UNSPEC_LASX_XVFRINTRP_S +- UNSPEC_LASX_XVFRINTRZ_S +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_S]) +- +-(define_int_iterator FRINT256_D [UNSPEC_LASX_XVFRINTRP_D +- UNSPEC_LASX_XVFRINTRZ_D +- UNSPEC_LASX_XVFRINT +- UNSPEC_LASX_XVFRINTRM_D]) +- +-(define_int_attr frint256_pattern_s +- [(UNSPEC_LASX_XVFRINTRP_S "ceil") +- (UNSPEC_LASX_XVFRINTRZ_S "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_S "floor")]) +- +-(define_int_attr frint256_pattern_d +- [(UNSPEC_LASX_XVFRINTRP_D "ceil") +- (UNSPEC_LASX_XVFRINTRZ_D "btrunc") +- (UNSPEC_LASX_XVFRINT "rint") +- (UNSPEC_LASX_XVFRINTRM_D "floor")]) +- +-(define_int_attr frint256_suffix +- [(UNSPEC_LASX_XVFRINTRP_S "rp") +- (UNSPEC_LASX_XVFRINTRP_D "rp") +- (UNSPEC_LASX_XVFRINTRZ_S "rz") +- (UNSPEC_LASX_XVFRINTRZ_D "rz") +- (UNSPEC_LASX_XVFRINT "") +- (UNSPEC_LASX_XVFRINTRM_S "rm") +- (UNSPEC_LASX_XVFRINTRM_D "rm")]) +- + (define_expand "vec_init" + [(match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "")] +@@ -1688,15 +1636,6 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvfrint_" +- [(set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINT))] +- "ISA_HAS_LASX" +- "xvfrint.\t%u0,%u1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvfrsqrt_" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +@@ -1706,16 +1645,6 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvftint_s__" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLASX 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINT_S))] +- "ISA_HAS_LASX" +- "xvftint..\t%u0,%u1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvftint_u__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLASX 1 "register_operand" "f")] +@@ -1726,18 +1655,6 @@ + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +- +- +-(define_insn "fix_trunc2" +- [(set (match_operand: 0 "register_operand" "=f") +- (fix: (match_operand:FLASX 1 "register_operand" "f")))] +- "ISA_HAS_LASX" +- "xvftintrz..\t%u0,%u1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "") +- (set_attr "mode" "")]) +- +- + (define_insn "fixuns_trunc2" + [(set (match_operand: 0 "register_operand" "=f") + (unsigned_fix: (match_operand:FLASX 1 "register_operand" "f")))] +@@ -3245,60 +3162,6 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvftintrne_w_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRNE_W_S))] +- "ISA_HAS_LASX" +- "xvftintrne.w.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvftintrne_l_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRNE_L_D))] +- "ISA_HAS_LASX" +- "xvftintrne.l.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "lasx_xvftintrp_w_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRP_W_S))] +- "ISA_HAS_LASX" +- "xvftintrp.w.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvftintrp_l_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRP_L_D))] +- "ISA_HAS_LASX" +- "xvftintrp.l.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "lasx_xvftintrm_w_s" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (unspec:V8SI [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRM_W_S))] +- "ISA_HAS_LASX" +- "xvftintrm.w.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvftintrm_l_d" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (unspec:V4DI [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFTINTRM_L_D))] +- "ISA_HAS_LASX" +- "xvftintrm.l.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- + (define_insn "lasx_xvftint_w_d" + [(set (match_operand:V8SI 0 "register_operand" "=f") + (unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f") +@@ -3467,108 +3330,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V8SF")]) + +-(define_insn "lasx_xvfrintrne_s" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRNE_S))] +- "ISA_HAS_LASX" +- "xvfrintrne.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvfrintrne_d" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRNE_D))] +- "ISA_HAS_LASX" +- "xvfrintrne.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "lasx_xvfrintrz_s" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRZ_S))] +- "ISA_HAS_LASX" +- "xvfrintrz.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvfrintrz_d" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRZ_D))] +- "ISA_HAS_LASX" +- "xvfrintrz.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "lasx_xvfrintrp_s" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRP_S))] +- "ISA_HAS_LASX" +- "xvfrintrp.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvfrintrp_d" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRP_D))] +- "ISA_HAS_LASX" +- "xvfrintrp.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "lasx_xvfrintrm_s" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRM_S))] +- "ISA_HAS_LASX" +- "xvfrintrm.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "lasx_xvfrintrm_d" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINTRM_D))] +- "ISA_HAS_LASX" +- "xvfrintrm.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-;; Vector versions of the floating-point frint patterns. +-;; Expands to btrunc, ceil, floor, rint. +-(define_insn "v8sf2" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (unspec:V8SF [(match_operand:V8SF 1 "register_operand" "f")] +- FRINT256_S))] +- "ISA_HAS_LASX" +- "xvfrint.s\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V8SF")]) +- +-(define_insn "v4df2" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f")] +- FRINT256_D))] +- "ISA_HAS_LASX" +- "xvfrint.d\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4DF")]) +- +-;; Expands to round. +-(define_insn "round2" +- [(set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRINT))] +- "ISA_HAS_LASX" +- "xvfrint.\t%u0,%u1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + ;; Offset load and broadcast + (define_expand "lasx_xvldrepl_" + [(match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 2d9743d86..fb458feac 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -419,8 +419,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vabsd_hu CODE_FOR_lsx_vabsd_u_hu + #define CODE_FOR_lsx_vabsd_wu CODE_FOR_lsx_vabsd_u_wu + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du +-#define CODE_FOR_lsx_vftint_w_s CODE_FOR_lsx_vftint_s_w_s +-#define CODE_FOR_lsx_vftint_l_d CODE_FOR_lsx_vftint_s_l_d + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d + #define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +@@ -725,8 +723,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvssrlrn_bu_h CODE_FOR_lasx_xvssrlrn_u_bu_h + #define CODE_FOR_lasx_xvssrlrn_hu_w CODE_FOR_lasx_xvssrlrn_u_hu_w + #define CODE_FOR_lasx_xvssrlrn_wu_d CODE_FOR_lasx_xvssrlrn_u_wu_d +-#define CODE_FOR_lasx_xvftint_w_s CODE_FOR_lasx_xvftint_s_w_s +-#define CODE_FOR_lasx_xvftint_l_d CODE_FOR_lasx_xvftint_s_l_d + #define CODE_FOR_lasx_xvftint_wu_s CODE_FOR_lasx_xvftint_u_wu_s + #define CODE_FOR_lasx_xvftint_lu_d CODE_FOR_lasx_xvftint_u_lu_d + #define CODE_FOR_lasx_xvsllwil_h_b CODE_FOR_lasx_xvsllwil_s_h_b +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c4e7af107..d1c766cbf 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4026,11 +4026,8 @@ + (include "generic.md") + (include "la464.md") + +-; The LoongArch SX Instructions. +-(include "lsx.md") +- +-; The LoongArch ASX Instructions. +-(include "lasx.md") ++; The LoongArch SIMD Instructions. ++(include "simd.md") + + (define_c_enum "unspec" [ + UNSPEC_ADDRESS_FIRST +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 5e8d8d74b..c1c3719e3 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -55,7 +55,6 @@ + UNSPEC_LSX_VFCMP_SULT + UNSPEC_LSX_VFCMP_SUN + UNSPEC_LSX_VFCMP_SUNE +- UNSPEC_LSX_VFTINT_S + UNSPEC_LSX_VFTINT_U + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U +@@ -89,9 +88,6 @@ + UNSPEC_LSX_VEXTRINS + UNSPEC_LSX_VMSKLTZ + UNSPEC_LSX_VSIGNCOV +- UNSPEC_LSX_VFTINTRNE +- UNSPEC_LSX_VFTINTRP +- UNSPEC_LSX_VFTINTRM + UNSPEC_LSX_VFTINT_W_D + UNSPEC_LSX_VFFINT_S_L + UNSPEC_LSX_VFTINTRZ_W_D +@@ -110,14 +106,6 @@ + UNSPEC_LSX_VFTINTRNEL_L_S + UNSPEC_LSX_VFTINTRNEH_L_S + UNSPEC_LSX_VFTINTH_L_H +- UNSPEC_LSX_VFRINTRNE_S +- UNSPEC_LSX_VFRINTRNE_D +- UNSPEC_LSX_VFRINTRZ_S +- UNSPEC_LSX_VFRINTRZ_D +- UNSPEC_LSX_VFRINTRP_S +- UNSPEC_LSX_VFRINTRP_D +- UNSPEC_LSX_VFRINTRM_S +- UNSPEC_LSX_VFRINTRM_D + UNSPEC_LSX_VSSRARN_S + UNSPEC_LSX_VSSRARN_U + UNSPEC_LSX_VSSRLN_U +@@ -221,9 +209,6 @@ + ;; Only used for copy_{u,s}.w and vilvh. + (define_mode_iterator LSX_W [V4SI V4SF]) + +-;; Only integer modes. +-(define_mode_iterator ILSX [V2DI V4SI V8HI V16QI]) +- + ;; As ILSX but excludes V16QI. + (define_mode_iterator ILSX_DWH [V2DI V4SI V8HI]) + +@@ -242,21 +227,9 @@ + ;;;; Only integer modes for fixed-point madd_q/maddr_q. + ;;(define_mode_iterator ILSX_WH [V4SI V8HI]) + +-;; Only floating-point modes. +-(define_mode_iterator FLSX [V2DF V4SF]) +- + ;; Only used for immediate set shuffle elements instruction. + (define_mode_iterator LSX_WHB_W [V4SI V8HI V16QI V4SF]) + +-;; The attribute gives the integer vector mode with same size. +-(define_mode_attr VIMODE +- [(V2DF "V2DI") +- (V4SF "V4SI") +- (V2DI "V2DI") +- (V4SI "V4SI") +- (V8HI "V8HI") +- (V16QI "V16QI")]) +- + ;; The attribute gives half modes for vector modes. + (define_mode_attr VHMODE + [(V8HI "V16QI") +@@ -400,38 +373,6 @@ + (V4SI "uimm5") + (V2DI "uimm6")]) + +- +-(define_int_iterator FRINT_S [UNSPEC_LSX_VFRINTRP_S +- UNSPEC_LSX_VFRINTRZ_S +- UNSPEC_LSX_VFRINT +- UNSPEC_LSX_VFRINTRM_S]) +- +-(define_int_iterator FRINT_D [UNSPEC_LSX_VFRINTRP_D +- UNSPEC_LSX_VFRINTRZ_D +- UNSPEC_LSX_VFRINT +- UNSPEC_LSX_VFRINTRM_D]) +- +-(define_int_attr frint_pattern_s +- [(UNSPEC_LSX_VFRINTRP_S "ceil") +- (UNSPEC_LSX_VFRINTRZ_S "btrunc") +- (UNSPEC_LSX_VFRINT "rint") +- (UNSPEC_LSX_VFRINTRM_S "floor")]) +- +-(define_int_attr frint_pattern_d +- [(UNSPEC_LSX_VFRINTRP_D "ceil") +- (UNSPEC_LSX_VFRINTRZ_D "btrunc") +- (UNSPEC_LSX_VFRINT "rint") +- (UNSPEC_LSX_VFRINTRM_D "floor")]) +- +-(define_int_attr frint_suffix +- [(UNSPEC_LSX_VFRINTRP_S "rp") +- (UNSPEC_LSX_VFRINTRP_D "rp") +- (UNSPEC_LSX_VFRINTRZ_S "rz") +- (UNSPEC_LSX_VFRINTRZ_D "rz") +- (UNSPEC_LSX_VFRINT "") +- (UNSPEC_LSX_VFRINTRM_S "rm") +- (UNSPEC_LSX_VFRINTRM_D "rm")]) +- + (define_expand "vec_init" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "")] +@@ -1616,15 +1557,6 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lsx_vfrint_" +- [(set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINT))] +- "ISA_HAS_LSX" +- "vfrint.\t%w0,%w1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vfrsqrt_" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +@@ -1634,16 +1566,6 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lsx_vftint_s__" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLSX 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINT_S))] +- "ISA_HAS_LSX" +- "vftint..\t%w0,%w1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vftint_u__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLSX 1 "register_operand" "f")] +@@ -1654,15 +1576,6 @@ + (set_attr "cnv_mode" "") + (set_attr "mode" "")]) + +-(define_insn "fix_trunc2" +- [(set (match_operand: 0 "register_operand" "=f") +- (fix: (match_operand:FLSX 1 "register_operand" "f")))] +- "ISA_HAS_LSX" +- "vftintrz..\t%w0,%w1" +- [(set_attr "type" "simd_fcvt") +- (set_attr "cnv_mode" "") +- (set_attr "mode" "")]) +- + (define_insn "fixuns_trunc2" + [(set (match_operand: 0 "register_operand" "=f") + (unsigned_fix: (match_operand:FLSX 1 "register_operand" "f")))] +@@ -2965,60 +2878,6 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "lsx_vftintrne_w_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRNE))] +- "ISA_HAS_LSX" +- "vftintrne.w.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vftintrne_l_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRNE))] +- "ISA_HAS_LSX" +- "vftintrne.l.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-(define_insn "lsx_vftintrp_w_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRP))] +- "ISA_HAS_LSX" +- "vftintrp.w.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vftintrp_l_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRP))] +- "ISA_HAS_LSX" +- "vftintrp.l.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-(define_insn "lsx_vftintrm_w_s" +- [(set (match_operand:V4SI 0 "register_operand" "=f") +- (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRM))] +- "ISA_HAS_LSX" +- "vftintrm.w.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vftintrm_l_d" +- [(set (match_operand:V2DI 0 "register_operand" "=f") +- (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFTINTRM))] +- "ISA_HAS_LSX" +- "vftintrm.l.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- + (define_insn "lsx_vftint_w_d" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f") +@@ -3187,108 +3046,6 @@ + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4SF")]) + +-(define_insn "lsx_vfrintrne_s" +- [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRNE_S))] +- "ISA_HAS_LSX" +- "vfrintrne.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vfrintrne_d" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRNE_D))] +- "ISA_HAS_LSX" +- "vfrintrne.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-(define_insn "lsx_vfrintrz_s" +- [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRZ_S))] +- "ISA_HAS_LSX" +- "vfrintrz.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vfrintrz_d" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRZ_D))] +- "ISA_HAS_LSX" +- "vfrintrz.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-(define_insn "lsx_vfrintrp_s" +- [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRP_S))] +- "ISA_HAS_LSX" +- "vfrintrp.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vfrintrp_d" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRP_D))] +- "ISA_HAS_LSX" +- "vfrintrp.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-(define_insn "lsx_vfrintrm_s" +- [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRM_S))] +- "ISA_HAS_LSX" +- "vfrintrm.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "lsx_vfrintrm_d" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINTRM_D))] +- "ISA_HAS_LSX" +- "vfrintrm.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-;; Vector versions of the floating-point frint patterns. +-;; Expands to btrunc, ceil, floor, rint. +-(define_insn "v4sf2" +- [(set (match_operand:V4SF 0 "register_operand" "=f") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "f")] +- FRINT_S))] +- "ISA_HAS_LSX" +- "vfrint.s\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V4SF")]) +- +-(define_insn "v2df2" +- [(set (match_operand:V2DF 0 "register_operand" "=f") +- (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "f")] +- FRINT_D))] +- "ISA_HAS_LSX" +- "vfrint.d\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "V2DF")]) +- +-;; Expands to round. +-(define_insn "round2" +- [(set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +- UNSPEC_LSX_VFRINT))] +- "ISA_HAS_LSX" +- "vfrint.\t%w0,%w1" +- [(set_attr "type" "simd_shift") +- (set_attr "mode" "")]) +- + ;; Offset load and broadcast + (define_expand "lsx_vldrepl_" + [(match_operand:LSX 0 "register_operand") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +new file mode 100644 +index 000000000..27d1ffecd +--- /dev/null ++++ b/gcc/config/loongarch/simd.md +@@ -0,0 +1,213 @@ ++;; Machine Description for LoongArch SIMD instructions for GNU compiler. ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++;; Integer modes supported by LSX. ++(define_mode_iterator ILSX [V2DI V4SI V8HI V16QI]) ++ ++;; Integer modes supported by LASX. ++(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI]) ++ ++;; FP modes supported by LSX ++(define_mode_iterator FLSX [V2DF V4SF]) ++ ++;; FP modes supported by LASX ++(define_mode_iterator FLASX [V4DF V8SF]) ++ ++;; All integer modes available ++(define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) ++ ++;; All FP modes available ++(define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")]) ++ ++;; Mnemonic prefix, "x" for LASX modes. ++(define_mode_attr x [(V2DI "") (V4SI "") (V8HI "") (V16QI "") ++ (V2DF "") (V4SF "") ++ (V4DI "x") (V8SI "x") (V16HI "x") (V32QI "x") ++ (V4DF "x") (V8SF "x")]) ++ ++;; Modifier for vector register, "w" for LSX modes, "u" for LASX modes. ++(define_mode_attr wu [(V2DI "w") (V4SI "w") (V8HI "w") (V16QI "w") ++ (V2DF "w") (V4SF "w") ++ (V4DI "u") (V8SI "u") (V16HI "u") (V32QI "u") ++ (V4DF "u") (V8SF "u")]) ++ ++;; define_insn name prefix, "lsx" or "lasx" ++(define_mode_attr simd_isa ++ [(V2DI "lsx") (V4SI "lsx") (V8HI "lsx") (V16QI "lsx") ++ (V2DF "lsx") (V4SF "lsx") ++ (V4DI "lasx") (V8SI "lasx") (V16HI "lasx") (V32QI "lasx") ++ (V4DF "lasx") (V8SF "lasx")]) ++ ++;; Widen integer modes for intermediate values in RTX pattern. ++(define_mode_attr WVEC [(V2DI "V2TI") (V4DI "V4TI") ++ (V4SI "V4DI") (V8SI "V8DI") ++ (V8HI "V8SI") (V16HI "V16SI") ++ (V16QI "V16HI") (V32QI "V32HI")]) ++ ++;; Integer vector modes with the same length and unit size as a mode. ++(define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI") ++ (V8HI "V8HI") (V16QI "V16QI") ++ (V2DF "V2DI") (V4SF "V4SI") ++ (V4DI "V4DI") (V8SI "V8SI") ++ (V16HI "V16HI") (V32QI "V32QI") ++ (V4DF "V4DI") (V8SF "V8SI")]) ++ ++;; Lower-case version. ++(define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si") ++ (V4DF "v4di") (V8SF "v8si")]) ++ ++;; Suffix for LSX or LASX instructions. ++(define_mode_attr simdfmt [(V2DF "d") (V4DF "d") ++ (V4SF "s") (V8SF "s") ++ (V2DI "d") (V4DI "d") ++ (V4SI "w") (V8SI "w") ++ (V8HI "h") (V16HI "h") ++ (V16QI "b") (V32QI "b")]) ++ ++;; Suffix for integer mode in LSX or LASX instructions with FP input but ++;; integer output. ++(define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l") ++ (V4SF "w") (V8SF "w")]) ++ ++;; Size of vector elements in bits. ++(define_mode_attr elmbits [(V2DI "64") (V4DI "64") ++ (V4SI "32") (V8SI "32") ++ (V8HI "16") (V16HI "16") ++ (V16QI "8") (V32QI "8")]) ++ ++;; ======================================================================= ++;; For many LASX instructions, the only difference of it from the LSX ++;; counterpart is the length of vector operands. Describe these LSX/LASX ++;; instruction here so we can avoid duplicating logics. ++;; ======================================================================= ++ ++;; ++;; FP vector rounding instructions ++;; ++ ++(define_c_enum "unspec" ++ [UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINT ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE]) ++ ++(define_int_iterator SIMD_FRINT ++ [UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINT ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE]) ++ ++(define_int_attr simd_frint_rounding ++ [(UNSPEC_SIMD_FRINTRP "rp") ++ (UNSPEC_SIMD_FRINTRZ "rz") ++ (UNSPEC_SIMD_FRINT "") ++ (UNSPEC_SIMD_FRINTRM "rm") ++ (UNSPEC_SIMD_FRINTRNE "rne")]) ++ ++;; All these, but rint, are controlled by -ffp-int-builtin-inexact. ++;; Note: nearbyint is NOT allowed to raise FE_INEXACT even if ++;; -ffp-int-builtin-inexact, but rint is ALLOWED to raise it even if ++;; -fno-fp-int-builtin-inexact. ++(define_int_attr simd_frint_pattern ++ [(UNSPEC_SIMD_FRINTRP "ceil") ++ (UNSPEC_SIMD_FRINTRZ "btrunc") ++ (UNSPEC_SIMD_FRINT "rint") ++ (UNSPEC_SIMD_FRINTRNE "roundeven") ++ (UNSPEC_SIMD_FRINTRM "floor")]) ++ ++;; vfrint.{/rp/rz/rm} ++(define_insn "_vfrint_" ++ [(set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] ++ SIMD_FRINT))] ++ "" ++ "vfrint.\t%0,%1" ++ [(set_attr "type" "simd_fcvt") ++ (set_attr "mode" "")]) ++ ++;; Expand the standard-named patterns to vfrint instructions if ++;; raising inexact exception is allowed. ++ ++(define_expand "2" ++ [(set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] ++ SIMD_FRINT))] ++ " == UNSPEC_SIMD_FRINT || ++ flag_fp_int_builtin_inexact || ++ !flag_trapping_math") ++ ++;; ftrunc is like btrunc, but it's allowed to raise inexact exception ++;; even if -fno-fp-int-builtin-inexact. ++(define_expand "ftrunc2" ++ [(set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] ++ UNSPEC_SIMD_FRINTRZ))] ++ "") ++ ++;; vftint.{/rp/rz/rm} ++(define_insn ++ "_vftint__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (fix: ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] ++ SIMD_FRINT)))] ++ "" ++ "vftint..\t%0,%1" ++ [(set_attr "type" "simd_fcvt") ++ (set_attr "mode" "")]) ++ ++;; Expand the standard-named patterns to vftint instructions if ++;; raising inexact exception. ++ ++(define_expand "l2" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (fix: ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] ++ SIMD_FRINT)))] ++ " == UNSPEC_SIMD_FRINT || ++ flag_fp_int_builtin_inexact || ++ !flag_trapping_math") ++ ++;; fix_trunc is allowed to raise inexact exception even if ++;; -fno-fp-int-builtin-inexact. Because the middle end trys to match ++;; (FIX x) and it does not know (FIX (UNSPEC_SIMD_FRINTRZ x)), we need ++;; to use define_insn_and_split instead of define_expand (expanders are ++;; not considered during matching). ++(define_insn_and_split "fix_trunc2" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (fix: (match_operand:FVEC 1 "register_operand" "f")))] ++ "" ++ "#" ++ "" ++ [(const_int 0)] ++ { ++ emit_insn (gen__vftintrz__ ( ++ operands[0], operands[1])); ++ DONE; ++ } ++ [(set_attr "type" "simd_fcvt") ++ (set_attr "mode" "")]) ++ ++; The LoongArch SX Instructions. ++(include "lsx.md") ++ ++; The LoongArch ASX Instructions. ++(include "lasx.md") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c +new file mode 100644 +index 000000000..7bbaf1fba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ ++ ++#include "vect-frint.c" ++ ++/* ceil */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrp\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrm\.d" } } */ ++ ++/* nearbyint + rint: Only rint is allowed */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.d" 1 } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(round\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundevenf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrne\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrne\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvfrintrz\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint.c b/gcc/testsuite/gcc.target/loongarch/vect-frint.c +new file mode 100644 +index 000000000..6bf211e7e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ ++ ++float out_x[8]; ++double out_y[4]; ++ ++float x[8]; ++double y[4]; ++ ++#define TEST(op, N, func) \ ++void \ ++test_##op##_##N##_##func () \ ++{ \ ++ for (int i = 0; i < N; i++) \ ++ out_##op[i] = __builtin_##func (op[i]); \ ++} ++ ++TEST(x, 4, ceilf); ++TEST(x, 4, floorf); ++TEST(x, 4, nearbyintf); ++TEST(x, 4, rintf); ++TEST(x, 4, roundf); ++TEST(x, 4, roundevenf); ++TEST(x, 4, truncf); ++ ++TEST(x, 8, ceilf); ++TEST(x, 8, floorf); ++TEST(x, 8, nearbyintf); ++TEST(x, 8, rintf); ++TEST(x, 8, roundf); ++TEST(x, 8, roundevenf); ++TEST(x, 8, truncf); ++ ++TEST(y, 2, ceil); ++TEST(y, 2, floor); ++TEST(y, 2, nearbyint); ++TEST(y, 2, rint); ++TEST(y, 2, round); ++TEST(y, 2, roundeven); ++TEST(y, 2, trunc); ++ ++TEST(y, 4, ceil); ++TEST(y, 4, floor); ++TEST(y, 4, nearbyint); ++TEST(y, 4, rint); ++TEST(y, 4, round); ++TEST(y, 4, roundeven); ++TEST(y, 4, trunc); ++ ++/* ceil */ ++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrp\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrm\.d" } } */ ++ ++/* rint and nearbyint ++ nearbyint has been disallowed to raise FE_INEXACT for decades. */ ++/* { dg-final { scan-assembler-times "\tvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvfrint\.d" 1 } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(round\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrne\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler "\txvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\txvfrintrz\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +new file mode 100644 +index 000000000..83d268099 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ ++ ++#include "vect-ftint.c" ++ ++/* ceil */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrp\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrp\.l\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrm\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrm\.l\.d" } } */ ++ ++/* nearbyint + rint */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.l\.d" 1 } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(lround\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundevenf\\)" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\tvftintrne\.l\.d" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ ++ ++/* trunc: XFAIL due to PR 107723 */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c +new file mode 100644 +index 000000000..c4962ed17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ ++ ++int out_x[8]; ++long out_y[4]; ++ ++float x[8]; ++double y[4]; ++ ++#define TEST(op, N, func) \ ++void \ ++test_##op##_##N##_##func () \ ++{ \ ++ for (int i = 0; i < N; i++) \ ++ out_##op[i] = __builtin_##func (op[i]); \ ++} ++ ++TEST(x, 4, ceilf); ++TEST(x, 4, floorf); ++TEST(x, 4, nearbyintf); ++TEST(x, 4, rintf); ++TEST(x, 4, roundf); ++TEST(x, 4, roundevenf); ++TEST(x, 4, truncf); ++ ++TEST(x, 8, ceilf); ++TEST(x, 8, floorf); ++TEST(x, 8, nearbyintf); ++TEST(x, 8, rintf); ++TEST(x, 8, roundf); ++TEST(x, 8, roundevenf); ++TEST(x, 8, truncf); ++ ++TEST(y, 2, ceil); ++TEST(y, 2, floor); ++TEST(y, 2, nearbyint); ++TEST(y, 2, rint); ++TEST(y, 2, round); ++TEST(y, 2, roundeven); ++TEST(y, 2, trunc); ++ ++TEST(y, 4, ceil); ++TEST(y, 4, floor); ++TEST(y, 4, nearbyint); ++TEST(y, 4, rint); ++TEST(y, 4, round); ++TEST(y, 4, roundeven); ++TEST(y, 4, trunc); ++ ++/* ceil */ ++/* { dg-final { scan-assembler "\tvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrp\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrp\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrp\.l\.d" } } */ ++ ++/* floor */ ++/* { dg-final { scan-assembler "\tvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrm\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrm\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrm\.l\.d" } } */ ++ ++/* rint and nearbyint ++ nearbyint has been disallowed to raise FE_INEXACT for decades. */ ++/* { dg-final { scan-assembler-times "\tvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\tvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.w\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "\txvftint\.l\.d" 1 } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(nearbyintf\\)" } } */ ++ ++/* round: we don't have a corresponding instruction */ ++/* { dg-final { scan-assembler "bl\t%plt\\(lround\\)" } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(roundf\\)" } } */ ++ ++/* roundeven */ ++/* { dg-final { scan-assembler "\tvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler "\tvftintrne\.l\.d" } } */ ++/* { dg-final { scan-assembler "\txvftintrne\.w\.s" } } */ ++/* { dg-final { scan-assembler "\txvftintrne\.l\.d" } } */ ++ ++/* trunc */ ++/* { dg-final { scan-assembler-not "bl\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler-not "bl\t%plt\\(truncf\\)" } } */ +-- +2.43.0 + diff --git a/0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch b/0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch new file mode 100644 index 0000000..4768ab9 --- /dev/null +++ b/0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch @@ -0,0 +1,268 @@ +From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 19 Nov 2023 16:28:59 +0800 +Subject: [PATCH 047/188] LoongArch: Use standard pattern name and RTX code for + LSX/LASX muh instructions + +Removes unnecessary UNSPECs and make the muh instructions useful with +GNU vectors or auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/simd.md (muh): New code attribute mapping + any_extend to smul_highpart or umul_highpart. + (mul3_highpart): New define_insn. + * config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove. + (UNSPEC_LSX_VMUH_U): Remove. + (lsx_vmuh_s_): Remove. + (lsx_vmuh_u_): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove. + (UNSPEC_LASX_XVMUH_U): Remove. + (lasx_xvmuh_s_): Remove. + (lasx_xvmuh_u_): Remove. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b): + Redefine to standard pattern name. + (CODE_FOR_lsx_vmuh_h): Likewise. + (CODE_FOR_lsx_vmuh_w): Likewise. + (CODE_FOR_lsx_vmuh_d): Likewise. + (CODE_FOR_lsx_vmuh_bu): Likewise. + (CODE_FOR_lsx_vmuh_hu): Likewise. + (CODE_FOR_lsx_vmuh_wu): Likewise. + (CODE_FOR_lsx_vmuh_du): Likewise. + (CODE_FOR_lasx_xvmuh_b): Likewise. + (CODE_FOR_lasx_xvmuh_h): Likewise. + (CODE_FOR_lasx_xvmuh_w): Likewise. + (CODE_FOR_lasx_xvmuh_d): Likewise. + (CODE_FOR_lasx_xvmuh_bu): Likewise. + (CODE_FOR_lasx_xvmuh_hu): Likewise. + (CODE_FOR_lasx_xvmuh_wu): Likewise. + (CODE_FOR_lasx_xvmuh_du): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-muh.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 32 ++++++++--------- + gcc/config/loongarch/lsx.md | 22 ------------ + gcc/config/loongarch/simd.md | 16 +++++++++ + gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++ + 5 files changed, 68 insertions(+), 60 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index d4a56c307..023a023b4 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -68,8 +68,6 @@ + UNSPEC_LASX_BRANCH + UNSPEC_LASX_BRANCH_V + +- UNSPEC_LASX_XVMUH_S +- UNSPEC_LASX_XVMUH_U + UNSPEC_LASX_MXVEXTW_U + UNSPEC_LASX_XVSLLWIL_S + UNSPEC_LASX_XVSLLWIL_U +@@ -2823,26 +2821,6 @@ + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvmuh_s_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVMUH_S))] +- "ISA_HAS_LASX" +- "xvmuh.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- +-(define_insn "lasx_xvmuh_u_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVMUH_U))] +- "ISA_HAS_LASX" +- "xvmuh.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvsllwil_s__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:ILASX_WHB 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index fb458feac..41ea357cf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3 + #define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3 + #define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3 ++#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart ++#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart ++#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart ++#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart ++#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart + #define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3 + #define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3 + #define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3 +@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4 + #define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4 + +-#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b +-#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h +-#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w +-#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d +-#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu +-#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu +-#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu +-#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du + #define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b + #define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h + #define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w +@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3 + #define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3 + #define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3 ++#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart ++#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart ++#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart ++#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart ++#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart + #define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2 + #define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2 + #define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2 +@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu + #define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu + #define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du +-#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b +-#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h +-#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w +-#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d +-#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu +-#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu +-#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu +-#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du + #define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h + #define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w + #define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c1c3719e3..537afaf96 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -64,8 +64,6 @@ + UNSPEC_LSX_VSRLR + UNSPEC_LSX_VSRLRI + UNSPEC_LSX_VSHUF +- UNSPEC_LSX_VMUH_S +- UNSPEC_LSX_VMUH_U + UNSPEC_LSX_VEXTW_S + UNSPEC_LSX_VEXTW_U + UNSPEC_LSX_VSLLWIL_S +@@ -2506,26 +2504,6 @@ + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +-(define_insn "lsx_vmuh_s_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f")] +- UNSPEC_LSX_VMUH_S))] +- "ISA_HAS_LSX" +- "vmuh.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- +-(define_insn "lsx_vmuh_u_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f")] +- UNSPEC_LSX_VMUH_U))] +- "ISA_HAS_LSX" +- "vmuh.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vextw_s_d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 27d1ffecd..a0e8db3c0 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -206,6 +206,22 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "")]) + ++;; vmuh.{b/h/w/d} ++ ++(define_code_attr muh ++ [(sign_extend "smul_highpart") ++ (zero_extend "umul_highpart")]) ++ ++(define_insn "mul3_highpart" ++ [(set (match_operand:IVEC 0 "register_operand" "=f") ++ (:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:IVEC 2 "register_operand" "f"))) ++ (any_extend (const_int 0))] ++ "" ++ "vmuh.\t%0,%1,%2" ++ [(set_attr "type" "simd_int_arith") ++ (set_attr "mode" "")]) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-muh.c b/gcc/testsuite/gcc.target/loongarch/vect-muh.c +new file mode 100644 +index 000000000..a788840b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-muh.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O3" } */ ++/* { dg-final { scan-assembler "\tvmuh\.w\t" } } */ ++/* { dg-final { scan-assembler "\tvmuh\.wu\t" } } */ ++/* { dg-final { scan-assembler "\txvmuh\.w\t" } } */ ++/* { dg-final { scan-assembler "\txvmuh\.wu\t" } } */ ++ ++int a[8], b[8], c[8]; ++ ++void ++test1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = ((long)a[i] * (long)b[i]) >> 32; ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32; ++} ++ ++void ++test3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = ((long)a[i] * (long)b[i]) >> 32; ++} ++ ++void ++test4 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32; ++} +-- +2.43.0 + diff --git a/0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch b/0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch new file mode 100644 index 0000000..fbb44a4 --- /dev/null +++ b/0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch @@ -0,0 +1,285 @@ +From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 19 Nov 2023 17:28:06 +0800 +Subject: [PATCH 048/188] LoongArch: Use standard pattern name and RTX code for + LSX/LASX rotate shift + +Remove unnecessary UNSPECs and make the [x]vrotr[i] instructions useful +with GNU vectors and auto vectorization. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (bitimm): Move to ... + (UNSPEC_LSX_VROTR): Remove. + (lsx_vrotr_): Remove. + (lsx_vrotri_): Remove. + * config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove. + (lsx_vrotr_): Remove. + (lsx_vrotri_): Remove. + * config/loongarch/simd.md (bitimm): ... here. Expand it to + cover LASX modes. + (vrotr3): New define_insn. + (vrotri3): New define_insn. + * config/loongarch/loongarch-builtins.cc: + (CODE_FOR_lsx_vrotr_b): Use standard pattern name. + (CODE_FOR_lsx_vrotr_h): Likewise. + (CODE_FOR_lsx_vrotr_w): Likewise. + (CODE_FOR_lsx_vrotr_d): Likewise. + (CODE_FOR_lasx_xvrotr_b): Likewise. + (CODE_FOR_lasx_xvrotr_h): Likewise. + (CODE_FOR_lasx_xvrotr_w): Likewise. + (CODE_FOR_lasx_xvrotr_d): Likewise. + (CODE_FOR_lsx_vrotri_b): Define to standard pattern name. + (CODE_FOR_lsx_vrotri_h): Likewise. + (CODE_FOR_lsx_vrotri_w): Likewise. + (CODE_FOR_lsx_vrotri_d): Likewise. + (CODE_FOR_lasx_xvrotri_b): Likewise. + (CODE_FOR_lasx_xvrotri_h): Likewise. + (CODE_FOR_lasx_xvrotri_w): Likewise. + (CODE_FOR_lasx_xvrotri_d): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-rotr.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 ------------ + gcc/config/loongarch/loongarch-builtins.cc | 16 +++++++++ + gcc/config/loongarch/lsx.md | 28 --------------- + gcc/config/loongarch/simd.md | 29 +++++++++++++++ + .../gcc.target/loongarch/vect-rotr.c | 36 +++++++++++++++++++ + 5 files changed, 81 insertions(+), 50 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 023a023b4..116b30c07 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -138,7 +138,6 @@ + UNSPEC_LASX_XVHSUBW_Q_D + UNSPEC_LASX_XVHADDW_QU_DU + UNSPEC_LASX_XVHSUBW_QU_DU +- UNSPEC_LASX_XVROTR + UNSPEC_LASX_XVADD_Q + UNSPEC_LASX_XVSUB_Q + UNSPEC_LASX_XVREPLVE +@@ -4232,18 +4231,6 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V4DI")]) + +-;;XVROTR.B XVROTR.H XVROTR.W XVROTR.D +-;;TODO-478 +-(define_insn "lasx_xvrotr_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") +- (match_operand:ILASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVROTR))] +- "ISA_HAS_LASX" +- "xvrotr.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- + ;;XVADD.Q + ;;TODO2 + (define_insn "lasx_xvadd_q" +@@ -4426,15 +4413,6 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V4DI")]) + +-(define_insn "lasx_xvrotri_" +- [(set (match_operand:ILASX 0 "register_operand" "=f") +- (rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")))] +- "ISA_HAS_LASX" +- "xvrotri.\t%u0,%u1,%2" +- [(set_attr "type" "simd_shf") +- (set_attr "mode" "")]) +- + (define_insn "lasx_xvextl_q_d" + [(set (match_operand:V4DI 0 "register_operand" "=f") + (unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 41ea357cf..f4523c8bf 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3 + #define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3 + #define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3 ++#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3 ++#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3 ++#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3 ++#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3 ++#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3 ++#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3 ++#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3 ++#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3 + #define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3 + #define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3 + #define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3 +@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + #define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3 + #define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3 + #define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3 ++#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3 ++#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3 ++#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3 ++#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3 ++#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3 ++#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3 ++#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3 ++#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3 + #define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3 + #define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3 + #define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3 +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 537afaf96..232399934 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -141,7 +141,6 @@ + UNSPEC_LSX_VMADDWOD + UNSPEC_LSX_VMADDWOD2 + UNSPEC_LSX_VMADDWOD3 +- UNSPEC_LSX_VROTR + UNSPEC_LSX_VADD_Q + UNSPEC_LSX_VSUB_Q + UNSPEC_LSX_VEXTH_Q_D +@@ -363,14 +362,6 @@ + (V8HI "exp_8") + (V16QI "exp_16")]) + +-;; This attribute is used to form an immediate operand constraint using +-;; "const__operand". +-(define_mode_attr bitimm +- [(V16QI "uimm3") +- (V8HI "uimm4") +- (V4SI "uimm5") +- (V2DI "uimm6")]) +- + (define_expand "vec_init" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "")] +@@ -4152,16 +4143,6 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "V2DI")]) + +-(define_insn "lsx_vrotr_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") +- (match_operand:ILSX 2 "register_operand" "f")] +- UNSPEC_LSX_VROTR))] +- "ISA_HAS_LSX" +- "vrotr.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_int_arith") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vadd_q" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f") +@@ -4255,15 +4236,6 @@ + [(set_attr "type" "simd_fcvt") + (set_attr "mode" "V2DI")]) + +-(define_insn "lsx_vrotri_" +- [(set (match_operand:ILSX 0 "register_operand" "=f") +- (rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f") +- (match_operand 2 "const__operand" "")))] +- "ISA_HAS_LSX" +- "vrotri.\t%w0,%w1,%2" +- [(set_attr "type" "simd_shf") +- (set_attr "mode" "")]) +- + (define_insn "lsx_vextl_q_d" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index a0e8db3c0..4ecf7a55e 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -91,6 +91,13 @@ + (V8HI "16") (V16HI "16") + (V16QI "8") (V32QI "8")]) + ++;; This attribute is used to form an immediate operand constraint using ++;; "const__operand". ++(define_mode_attr bitimm [(V16QI "uimm3") (V32QI "uimm3") ++ (V8HI "uimm4") (V16HI "uimm4") ++ (V4SI "uimm5") (V8SI "uimm5") ++ (V2DI "uimm6") (V4DI "uimm6")]) ++ + ;; ======================================================================= + ;; For many LASX instructions, the only difference of it from the LSX + ;; counterpart is the length of vector operands. Describe these LSX/LASX +@@ -222,6 +229,28 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + ++;; vrotr.{b/h/w/d} ++ ++(define_insn "vrotr3" ++ [(set (match_operand:IVEC 0 "register_operand" "=f") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:IVEC 2 "register_operand" "f")))] ++ "" ++ "vrotr.\t%0,%1,%2" ++ [(set_attr "type" "simd_int_arith") ++ (set_attr "mode" "")]) ++ ++;; vrotri.{b/h/w/d} ++ ++(define_insn "rotr3" ++ [(set (match_operand:IVEC 0 "register_operand" "=f") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const__operand")))] ++ "" ++ "vrotri.\t%0,%1,%2"; ++ [(set_attr "type" "simd_int_arith") ++ (set_attr "mode" "")]) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-rotr.c b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c +new file mode 100644 +index 000000000..733c36334 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvrotr\.w\t" } } */ ++/* { dg-final { scan-assembler "\txvrotr\.w\t" } } */ ++/* { dg-final { scan-assembler "\tvrotri\.w\t\[^\n\]*7\n" } } */ ++/* { dg-final { scan-assembler "\txvrotri\.w\t\[^\n\]*7\n" } } */ ++ ++unsigned int a[8], b[8]; ++ ++void ++test1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ a[i] = a[i] >> b[i] | a[i] << (32 - b[i]); ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ a[i] = a[i] >> b[i] | a[i] << (32 - b[i]); ++} ++ ++void ++test3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ a[i] = a[i] >> 7 | a[i] << 25; ++} ++ ++void ++test4 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ a[i] = a[i] >> 7 | a[i] << 25; ++} +-- +2.43.0 + diff --git a/0049-LoongArch-Remove-lrint_allow_inexact.patch b/0049-LoongArch-Remove-lrint_allow_inexact.patch new file mode 100644 index 0000000..870f1d5 --- /dev/null +++ b/0049-LoongArch-Remove-lrint_allow_inexact.patch @@ -0,0 +1,42 @@ +From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 20 Nov 2023 01:34:26 +0800 +Subject: [PATCH 049/188] LoongArch: Remove lrint_allow_inexact + +No functional change, just a cleanup. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (lrint_allow_inexact): Remove. + (2): Check if + == UNSPEC_FTINT instead of . +--- + gcc/config/loongarch/loongarch.md | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d1c766cbf..11577f407 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -585,9 +585,6 @@ + (define_int_attr lrint_submenmonic [(UNSPEC_FTINT "") + (UNSPEC_FTINTRM "rm") + (UNSPEC_FTINTRP "rp")]) +-(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1") +- (UNSPEC_FTINTRM "0") +- (UNSPEC_FTINTRP "0")]) + + ;; Iterator and attributes for bytepick.d + (define_int_iterator bytepick_w_ashift_amount [8 16 24]) +@@ -2384,7 +2381,7 @@ + (unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")] + LRINT))] + "TARGET_HARD_FLOAT && +- ( ++ ( == UNSPEC_FTINT + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint.. %0,%1" +-- +2.43.0 + diff --git a/0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch b/0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch new file mode 100644 index 0000000..4847ba9 --- /dev/null +++ b/0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch @@ -0,0 +1,150 @@ +From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 20 Nov 2023 03:51:56 +0800 +Subject: [PATCH 050/188] LoongArch: Use LSX for scalar FP rounding with + explicit rounding mode + +In LoongArch FP base ISA there is only the frint.{s/d} instruction which +reads the global rounding mode. Utilize LSX for explicit rounding mode +even if the operand is scalar. It seems wasting the CPU power, but +still much faster than calling the library function. + +gcc/ChangeLog: + + * config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator. + (VLSX_FOR_FMODE): New mode attribute. + (2): New expander, + expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-frint-scalar.c: New test. + * gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test. +--- + gcc/config/loongarch/simd.md | 28 ++++++++++++ + .../loongarch/vect-frint-scalar-no-inexact.c | 23 ++++++++++ + .../gcc.target/loongarch/vect-frint-scalar.c | 43 +++++++++++++++++++ + 3 files changed, 94 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 4ecf7a55e..843b1a41f 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -169,6 +169,34 @@ + UNSPEC_SIMD_FRINTRZ))] + "") + ++;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int- ++;; builtin-inexact. The base FP instruction set lacks these operations. ++;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still ++;; much faster than calling a libc function: on LA464 and LA664 there is a ++;; 3x ~ 5x speed up. ++;; ++;; Note that a vreplvei instruction is needed or we'll also operate on the ++;; junk in high bits of the vector register and produce random FP exceptions. ++ ++(define_int_iterator LSX_SCALAR_FRINT ++ [UNSPEC_SIMD_FRINTRP ++ UNSPEC_SIMD_FRINTRZ ++ UNSPEC_SIMD_FRINTRM ++ UNSPEC_SIMD_FRINTRNE]) ++ ++(define_mode_attr VLSX_FOR_FMODE [(DF "V2DF") (SF "V4SF")]) ++ ++(define_expand "2" ++ [(set (match_dup 2) ++ (vec_duplicate: ++ (match_operand:ANYF 1 "register_operand"))) ++ (set (match_dup 2) ++ (unspec: [(match_dup 2)] LSX_SCALAR_FRINT)) ++ (set (match_operand:ANYF 0 "register_operand") ++ (vec_select:ANYF (match_dup 2) (parallel [(const_int 0)])))] ++ "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)" ++ "operands[2] = gen_reg_rtx (mode);") ++ + ;; vftint.{/rp/rz/rm} + (define_insn + "_vftint__" +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +new file mode 100644 +index 000000000..002e3b92d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */ ++ ++#include "vect-frint-scalar.c" ++ ++/* cannot use LSX for these with -fno-fp-int-builtin-inexact, ++ call library function. */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +new file mode 100644 +index 000000000..c7cb40be7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++ ++#define test(func, suffix) \ ++__typeof__ (1.##suffix) \ ++_##func##suffix (__typeof__ (1.##suffix) x) \ ++{ \ ++ return __builtin_##func##suffix (x); \ ++} ++ ++test (ceil, f) ++test (ceil, ) ++test (floor, f) ++test (floor, ) ++test (trunc, f) ++test (trunc, ) ++test (roundeven, f) ++test (roundeven, ) ++test (nearbyint, f) ++test (nearbyint, ) ++test (rint, f) ++test (rint, ) ++ ++/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */ ++/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */ ++/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */ ++ ++/* must do vreplvei first */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */ ++/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */ ++ ++/* nearbyint is not allowed to rasie FE_INEXACT for decades */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */ ++/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */ ++ ++/* rint should just use basic FP operation */ ++/* { dg-final { scan-assembler "\tfrint\.s" } } */ ++/* { dg-final { scan-assembler "\tfrint\.d" } } */ +-- +2.43.0 + diff --git a/0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch b/0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch new file mode 100644 index 0000000..d3bcb95 --- /dev/null +++ b/0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch @@ -0,0 +1,49 @@ +From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Tue, 28 Nov 2023 15:56:35 +0800 +Subject: [PATCH 051/188] LoongArch: Remove duplicate definition of + CLZ_DEFINED_VALUE_AT_ZERO. + +In the r14-5547 commit, C[LT]Z_DEFINED_VALUE_AT_ZERO were defined at +the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been +defined, so remove the duplicate definition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add + description. + (CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition. +--- + gcc/config/loongarch/loongarch.h | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 19cf6fd33..8b28be0e4 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3. If not see + /* Define if loading short immediate values into registers sign extends. */ + #define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +-/* The clz.{w/d} instructions have the natural values at 0. */ ++/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0. */ + + #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ ++ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + + /* Standard register usage. */ + +@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function + + #define TARGET_EXPLICIT_RELOCS \ + (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) +- +-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ +- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +-- +2.43.0 + diff --git a/0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch b/0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch new file mode 100644 index 0000000..768de3c --- /dev/null +++ b/0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch @@ -0,0 +1,4375 @@ +From 8d5c983efc35804f98823e203eada6263dd1604e Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 28 Nov 2023 16:23:53 +0800 +Subject: [PATCH 052/188] LoongArch: Added vectorized hardware inspection for + testsuite. + +When GCC regression tests are executed on a cpu that does not support +vectorization, the loongarch/vector directory will have some FAIL entries for +all test cases related to vectorization runs. In order to solve this kind +of problem, a vectorized hardware detection function was added to the code, +which can only be compiled but not run. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c:Remove + the default Settings to run the behavior. + * gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvadda.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvand.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvandn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitset.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclo.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvclz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvextrins.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvffinth.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvftintl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvh.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvilvl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvld.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvldi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmadd.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmsub.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmul.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvneg.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvnori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvori.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvorn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpackod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickev.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickod.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvprem.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvrotri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseq.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvseqi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsll.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsra.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrai.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrar.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrari.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrl.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrli.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrln.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssran.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrani.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrln.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvst.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsub.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubi.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvxor.c:Dito. + * gcc.target/loongarch/vector/lasx/lasx-xvxori.c:Dito. + * gcc.target/loongarch/vector/loongarch-vector.exp:Added hardware + detection to set the behavior of program execution based on the + characteristics of the hardware. + * gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c:Remove the default + Settings to run the behavior. + * gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vadd.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vadda.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vand.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vandi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vandn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavg-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavg-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitclr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitclri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitrev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitsel.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitseli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitset.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbitseti.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbsll.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vbsrl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vclo.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vclz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vexth-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vexth-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextl-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextl-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vextrins.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vffint-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrstp.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vftint-4.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vilvh.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vilvl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vld.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vldi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmadd.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmax-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmax-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmin-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmin-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmini-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmini-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmskgez.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmskltz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmsknz.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmsub.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmul.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vneg.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vnor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vnori.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vori.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vorn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpackev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpackod.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpcnt.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickev.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickod.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vpremi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplve.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vreplvei.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vrotr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vrotri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsat-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsat-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vseq.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vseqi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsigncov.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsle-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsle-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslei-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslei-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsll.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslt-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslt-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslti-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vslti-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsra.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrai.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsran.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrani.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrar.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrari.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrarn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrarni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrl.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrli.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrln.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlr.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlri.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssran.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrani.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrarn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrarni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrln.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssub-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vssub-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vst.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsub.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubi.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vxor.c:Dito. + * gcc.target/loongarch/vector/lsx/lsx-vxori.c:Dito. +--- + .../loongarch/vector/lasx/lasx-xvabsd-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvabsd-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvadd.c | 1 - + .../loongarch/vector/lasx/lasx-xvadda.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddi.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvaddwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvand.c | 1 - + .../loongarch/vector/lasx/lasx-xvandi.c | 1 - + .../loongarch/vector/lasx/lasx-xvandn.c | 1 - + .../loongarch/vector/lasx/lasx-xvavg-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvavg-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvavgr-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvavgr-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitclr.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitclri.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitrev.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitrevi.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitsel.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitseli.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitset.c | 1 - + .../loongarch/vector/lasx/lasx-xvbitseti.c | 1 - + .../loongarch/vector/lasx/lasx-xvbsll_v.c | 1 - + .../loongarch/vector/lasx/lasx-xvbsrl_v.c | 1 - + .../loongarch/vector/lasx/lasx-xvclo.c | 1 - + .../loongarch/vector/lasx/lasx-xvclz.c | 1 - + .../loongarch/vector/lasx/lasx-xvdiv-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvdiv-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvext2xv-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvext2xv-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvexth-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvexth-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvextl-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvextl-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvextrins.c | 1 - + .../loongarch/vector/lasx/lasx-xvfadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfclass_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfclass_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_caf_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cle_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_clt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cne_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cor_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_cun_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_saf_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_seq_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sle_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_slt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sne_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sor_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcmp_sun_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcvt.c | 1 - + .../loongarch/vector/lasx/lasx-xvfcvth.c | 1 - + .../loongarch/vector/lasx/lasx-xvffint-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvffint-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvffinth.c | 1 - + .../loongarch/vector/lasx/lasx-xvflogb_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvflogb_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmax_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmax_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmaxa_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfmaxa_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfnmadd_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfnmadd_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrint_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrint_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrstp.c | 1 - + .../loongarch/vector/lasx/lasx-xvfrstpi.c | 1 - + .../loongarch/vector/lasx/lasx-xvfsqrt_d.c | 1 - + .../loongarch/vector/lasx/lasx-xvfsqrt_s.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvftint-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvftintl.c | 1 - + .../loongarch/vector/lasx/lasx-xvhaddw-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvhaddw-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvhsubw-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvhsubw-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvilvh.c | 1 - + .../loongarch/vector/lasx/lasx-xvilvl.c | 1 - + .../loongarch/vector/lasx/lasx-xvinsgr2vr.c | 1 - + .../loongarch/vector/lasx/lasx-xvinsve0.c | 1 - + .../loongarch/vector/lasx/lasx-xvld.c | 1 - + .../loongarch/vector/lasx/lasx-xvldi.c | 1 - + .../loongarch/vector/lasx/lasx-xvmadd.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaddwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmax-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmax-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaxi-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmaxi-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmin-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmin-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmini-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmini-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmskgez.c | 1 - + .../loongarch/vector/lasx/lasx-xvmskltz.c | 1 - + .../loongarch/vector/lasx/lasx-xvmsknz.c | 1 - + .../loongarch/vector/lasx/lasx-xvmsub.c | 1 - + .../loongarch/vector/lasx/lasx-xvmuh-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmuh-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmul.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwev-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvmulwod-3.c | 1 - + .../loongarch/vector/lasx/lasx-xvneg.c | 1 - + .../loongarch/vector/lasx/lasx-xvnor.c | 1 - + .../loongarch/vector/lasx/lasx-xvnori.c | 1 - + .../loongarch/vector/lasx/lasx-xvor.c | 1 - + .../loongarch/vector/lasx/lasx-xvori.c | 1 - + .../loongarch/vector/lasx/lasx-xvorn.c | 1 - + .../loongarch/vector/lasx/lasx-xvpackev.c | 1 - + .../loongarch/vector/lasx/lasx-xvpackod.c | 1 - + .../loongarch/vector/lasx/lasx-xvpcnt.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickev.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickod.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickve.c | 1 - + .../loongarch/vector/lasx/lasx-xvpickve2gr.c | 1 - + .../loongarch/vector/lasx/lasx-xvprem.c | 1 - + .../loongarch/vector/lasx/lasx-xvpremi.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplgr2vr.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplve.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplve0.c | 1 - + .../loongarch/vector/lasx/lasx-xvreplvei.c | 1 - + .../loongarch/vector/lasx/lasx-xvrotr.c | 1 - + .../loongarch/vector/lasx/lasx-xvrotri.c | 1 - + .../loongarch/vector/lasx/lasx-xvsadd-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsadd-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsat-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsat-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvseq.c | 1 - + .../loongarch/vector/lasx/lasx-xvseqi.c | 1 - + .../loongarch/vector/lasx/lasx-xvshuf4i_b.c | 1 - + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 1 - + .../loongarch/vector/lasx/lasx-xvsigncov.c | 1 - + .../loongarch/vector/lasx/lasx-xvsle-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsle-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslei-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslei-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsll.c | 1 - + .../loongarch/vector/lasx/lasx-xvslli.c | 1 - + .../loongarch/vector/lasx/lasx-xvsllwil-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsllwil-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslt-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslt-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvslti-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvslti-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsra.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrai.c | 1 - + .../loongarch/vector/lasx/lasx-xvsran.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrani.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrar.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrari.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrarn.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrarni.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrl.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrli.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrln.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlni.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlr.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlri.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlrn.c | 1 - + .../loongarch/vector/lasx/lasx-xvsrlrni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssran.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrani.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrarn.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrarni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrln.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlrn.c | 1 - + .../loongarch/vector/lasx/lasx-xvssrlrni.c | 1 - + .../loongarch/vector/lasx/lasx-xvssub-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvssub-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvst.c | 1 - + .../loongarch/vector/lasx/lasx-xvsub.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubi.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwev-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwev-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwod-1.c | 1 - + .../loongarch/vector/lasx/lasx-xvsubwod-2.c | 1 - + .../loongarch/vector/lasx/lasx-xvxor.c | 1 - + .../loongarch/vector/lasx/lasx-xvxori.c | 1 - + .../loongarch/vector/loongarch-vector.exp | 23 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-vabsd-1.c | 1 - + .../loongarch/vector/lsx/lsx-vabsd-2.c | 1 - + .../loongarch/vector/lsx/lsx-vadd.c | 1 - + .../loongarch/vector/lsx/lsx-vadda.c | 1 - + .../loongarch/vector/lsx/lsx-vaddi.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vaddwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vand.c | 1 - + .../loongarch/vector/lsx/lsx-vandi.c | 1 - + .../loongarch/vector/lsx/lsx-vandn.c | 1 - + .../loongarch/vector/lsx/lsx-vavg-1.c | 1 - + .../loongarch/vector/lsx/lsx-vavg-2.c | 1 - + .../loongarch/vector/lsx/lsx-vavgr-1.c | 1 - + .../loongarch/vector/lsx/lsx-vavgr-2.c | 1 - + .../loongarch/vector/lsx/lsx-vbitclr.c | 1 - + .../loongarch/vector/lsx/lsx-vbitclri.c | 1 - + .../loongarch/vector/lsx/lsx-vbitrev.c | 1 - + .../loongarch/vector/lsx/lsx-vbitrevi.c | 1 - + .../loongarch/vector/lsx/lsx-vbitsel.c | 1 - + .../loongarch/vector/lsx/lsx-vbitseli.c | 1 - + .../loongarch/vector/lsx/lsx-vbitset.c | 1 - + .../loongarch/vector/lsx/lsx-vbitseti.c | 1 - + .../loongarch/vector/lsx/lsx-vbsll.c | 1 - + .../loongarch/vector/lsx/lsx-vbsrl.c | 1 - + .../loongarch/vector/lsx/lsx-vclo.c | 1 - + .../loongarch/vector/lsx/lsx-vclz.c | 1 - + .../loongarch/vector/lsx/lsx-vdiv-1.c | 1 - + .../loongarch/vector/lsx/lsx-vdiv-2.c | 1 - + .../loongarch/vector/lsx/lsx-vexth-1.c | 1 - + .../loongarch/vector/lsx/lsx-vexth-2.c | 1 - + .../loongarch/vector/lsx/lsx-vextl-1.c | 1 - + .../loongarch/vector/lsx/lsx-vextl-2.c | 1 - + .../loongarch/vector/lsx/lsx-vextrins.c | 1 - + .../loongarch/vector/lsx/lsx-vfadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfclass_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfclass_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_caf.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_ceq.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cle.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_clt.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cne.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cor.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_cun.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_saf.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_seq.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sle.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_slt.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sne.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sor.c | 1 - + .../loongarch/vector/lsx/lsx-vfcmp_sun.c | 1 - + .../loongarch/vector/lsx/lsx-vfcvt-1.c | 1 - + .../loongarch/vector/lsx/lsx-vfcvt-2.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-1.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-2.c | 1 - + .../loongarch/vector/lsx/lsx-vffint-3.c | 1 - + .../loongarch/vector/lsx/lsx-vflogb_d.c | 1 - + .../loongarch/vector/lsx/lsx-vflogb_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmax_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmax_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfmaxa_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfmaxa_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfnmadd_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfnmadd_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfrint_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfrint_s.c | 1 - + .../loongarch/vector/lsx/lsx-vfrstp.c | 1 - + .../loongarch/vector/lsx/lsx-vfrstpi.c | 1 - + .../loongarch/vector/lsx/lsx-vfsqrt_d.c | 1 - + .../loongarch/vector/lsx/lsx-vfsqrt_s.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-1.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-2.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-3.c | 1 - + .../loongarch/vector/lsx/lsx-vftint-4.c | 1 - + .../loongarch/vector/lsx/lsx-vhaddw-1.c | 1 - + .../loongarch/vector/lsx/lsx-vhaddw-2.c | 1 - + .../loongarch/vector/lsx/lsx-vhsubw-1.c | 1 - + .../loongarch/vector/lsx/lsx-vhsubw-2.c | 1 - + .../loongarch/vector/lsx/lsx-vilvh.c | 1 - + .../loongarch/vector/lsx/lsx-vilvl.c | 1 - + .../loongarch/vector/lsx/lsx-vinsgr2vr.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vld.c | 1 - + .../loongarch/vector/lsx/lsx-vldi.c | 1 - + .../loongarch/vector/lsx/lsx-vmadd.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaddwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmax-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmax-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmaxi-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmaxi-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmin-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmin-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmini-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmini-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmskgez.c | 1 - + .../loongarch/vector/lsx/lsx-vmskltz.c | 1 - + .../loongarch/vector/lsx/lsx-vmsknz.c | 1 - + .../loongarch/vector/lsx/lsx-vmsub.c | 1 - + .../loongarch/vector/lsx/lsx-vmuh-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmuh-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmul.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwev-3.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vmulwod-3.c | 1 - + .../loongarch/vector/lsx/lsx-vneg.c | 1 - + .../loongarch/vector/lsx/lsx-vnor.c | 1 - + .../loongarch/vector/lsx/lsx-vnori.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vor.c | 1 - + .../loongarch/vector/lsx/lsx-vori.c | 1 - + .../loongarch/vector/lsx/lsx-vorn.c | 1 - + .../loongarch/vector/lsx/lsx-vpackev.c | 1 - + .../loongarch/vector/lsx/lsx-vpackod.c | 1 - + .../loongarch/vector/lsx/lsx-vpcnt.c | 1 - + .../loongarch/vector/lsx/lsx-vpickev.c | 1 - + .../loongarch/vector/lsx/lsx-vpickod.c | 1 - + .../loongarch/vector/lsx/lsx-vpickve2gr.c | 1 - + .../loongarch/vector/lsx/lsx-vpremi.c | 1 - + .../loongarch/vector/lsx/lsx-vreplgr2vr.c | 1 - + .../loongarch/vector/lsx/lsx-vreplve.c | 1 - + .../loongarch/vector/lsx/lsx-vreplvei.c | 1 - + .../loongarch/vector/lsx/lsx-vrotr.c | 1 - + .../loongarch/vector/lsx/lsx-vrotri.c | 1 - + .../loongarch/vector/lsx/lsx-vsadd-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsadd-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsat-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsat-2.c | 1 - + .../loongarch/vector/lsx/lsx-vseq.c | 1 - + .../loongarch/vector/lsx/lsx-vseqi.c | 1 - + .../loongarch/vector/lsx/lsx-vshuf.c | 1 - + .../loongarch/vector/lsx/lsx-vshuf4i.c | 1 - + .../loongarch/vector/lsx/lsx-vsigncov.c | 1 - + .../loongarch/vector/lsx/lsx-vsle-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsle-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslei-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslei-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsll.c | 1 - + .../loongarch/vector/lsx/lsx-vslli.c | 1 - + .../loongarch/vector/lsx/lsx-vsllwil-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsllwil-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslt-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslt-2.c | 1 - + .../loongarch/vector/lsx/lsx-vslti-1.c | 1 - + .../loongarch/vector/lsx/lsx-vslti-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsra.c | 1 - + .../loongarch/vector/lsx/lsx-vsrai.c | 1 - + .../loongarch/vector/lsx/lsx-vsran.c | 1 - + .../loongarch/vector/lsx/lsx-vsrani.c | 1 - + .../loongarch/vector/lsx/lsx-vsrar.c | 1 - + .../loongarch/vector/lsx/lsx-vsrari.c | 1 - + .../loongarch/vector/lsx/lsx-vsrarn.c | 1 - + .../loongarch/vector/lsx/lsx-vsrarni.c | 1 - + .../loongarch/vector/lsx/lsx-vsrl.c | 1 - + .../loongarch/vector/lsx/lsx-vsrli.c | 1 - + .../loongarch/vector/lsx/lsx-vsrln.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlni.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlr.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlri.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlrn.c | 1 - + .../loongarch/vector/lsx/lsx-vsrlrni.c | 1 - + .../loongarch/vector/lsx/lsx-vssran.c | 1 - + .../loongarch/vector/lsx/lsx-vssrani.c | 1 - + .../loongarch/vector/lsx/lsx-vssrarn.c | 1 - + .../loongarch/vector/lsx/lsx-vssrarni.c | 1 - + .../loongarch/vector/lsx/lsx-vssrln.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlni.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlrn.c | 1 - + .../loongarch/vector/lsx/lsx-vssrlrni.c | 1 - + .../loongarch/vector/lsx/lsx-vssub-1.c | 1 - + .../loongarch/vector/lsx/lsx-vssub-2.c | 1 - + .../gcc.target/loongarch/vector/lsx/lsx-vst.c | 1 - + .../loongarch/vector/lsx/lsx-vsub.c | 1 - + .../loongarch/vector/lsx/lsx-vsubi.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwev-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwev-2.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwod-1.c | 1 - + .../loongarch/vector/lsx/lsx-vsubwod-2.c | 1 - + .../loongarch/vector/lsx/lsx-vxor.c | 1 - + .../loongarch/vector/lsx/lsx-vxori.c | 1 - + 393 files changed, 23 insertions(+), 392 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c +index 41fae32df..5e15a12cb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c +index bd7a9069d..fa0f9f6b5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvabsd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c +index 293295723..82da73440 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c +index d6b57d1cd..2c2701dc2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvadda.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c +index 054bf6e55..064b26fb6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c +index 70f3bf783..160073927 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c +index 22528a14f..c45840ea2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c +index 38a0a53d7..567bc1faf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c +index a4dc565e9..775b90547 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c +index a2fbe9ed0..34721ad56 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c +index 8c98fc4be..30d52b01c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c +index e485786dd..96ad473a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvand.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c +index 26cddc53a..59d6a14ab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c +index bc3590c21..b2809d369 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvandn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c +index 5ce31ebbd..18d186280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c +index d04e42753..4a79277b4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavg-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c +index 37b78aa1b..7e6a244e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c +index 3944a6ac0..f020cbeea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvavgr-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c +index def7b588e..70c928886 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c +index 713eb19d5..7eee98f40 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitclri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c +index 2b0e7f8d1..a4f104e8e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c +index 2b8327d91..967a01f6d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitrevi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c +index c9847a615..414080540 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitsel.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c +index 1edb4fca2..b2532f5eb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c +index c195cd91c..ff9d030f0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitset.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c +index 47f37e4b3..9081443bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbitseti.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c +index 3c1a8b8e6..7110423fc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsll_v.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c +index 340f7691b..236b5b28e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvbsrl_v.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c +index dbc52f92b..927fa16fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclo.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c +index 89191c467..3e39c212a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvclz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c +index 0d7c67703..e3cfe283e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c +index fd8b6d38c..71543290a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvdiv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c +index 94f31019c..2e9e4b03d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c +index d93201bc4..f6a098d96 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvext2xv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c +index 9fb4e3ff0..c64e6cadf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c +index fe6ff15d8..33ede4dab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvexth-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c +index c0d3e8e75..7f59c765d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c +index 8c7ab4ed3..d9eee597c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextl-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c +index 8e61f1c6d..e4dc8bf10 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvextrins.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c +index 657a19e58..7cd7ad8a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c +index 4002c4074..62ca8c9c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c +index 5d5b4c43c..5a2733075 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c +index 888e85b6e..cae82f6cb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfclass_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c +index fa3372358..1fe7c8bc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_caf_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c +index 6d6649f6f..d4c4aa150 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_ceq_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c +index a64dd7598..1ca2fbd91 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cle_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c +index 733cc00ee..0dffd68e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_clt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c +index 190741070..77ba5fca4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cne_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c +index 8dd58f228..954c7575c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cor_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c +index 3230c101d..98eb38573 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_cun_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c +index 23cbc4bf0..1427165fd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_saf_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c +index 6641d2c58..e61e0e655 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_seq_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c +index d25fc25da..24f4f2054 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sle_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c +index 8210f749b..f468d93c6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_slt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c +index 9d015a5c8..29c128e79 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sne_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c +index a61681073..29c080c50 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sor_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c +index 41f274920..eee56168b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcmp_sun_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c +index 116399a7c..8b6225d06 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c +index 001ce1c69..7933ec580 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfcvth.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c +index dd04fd788..e0240cb5c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c +index 3e2b15507..c6f4aeaa6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c +index e310ff5ee..4d8e71bd2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvffinth.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c +index bba1a06f3..57a4cd2b9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c +index b641c733f..798c75280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvflogb_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c +index c85c94bf6..f5c49f982 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c +index bde41dd5c..d25bbe6dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c +index 207ba167f..eefa1e5ac 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c +index 9b7703231..a9271e60d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmax_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c +index 96bbb942d..63605b85c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c +index c73a8a74a..4b59e3403 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfmaxa_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c +index d161c850c..0f6c5e4cc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c +index c5e9576ea..3f4540425 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfnmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c +index 4babf1638..e65ded196 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +index 9f2fa6747..fbfe300ea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c +index 557f9f8b5..72b3fe08d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstp.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c +index cdb7b11aa..cbb23e0a8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrstpi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c +index 18d5c51de..21f617231 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c +index 27df4a27d..0a28716bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfsqrt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c +index c75468d42..24b21ef8a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c +index ad72f7596..5a72994d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c +index 19db4e192..c02e00bdd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c +index b0fdf7e0b..f20ec5b83 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvftintl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c +index 1cf0ec698..03a885648 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c +index 14ec081a4..9ee92aa85 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhaddw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c +index fa4d5fd6f..e5101a857 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c +index 87c3e25b1..685b76e7e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvhsubw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c +index 5a047a508..cbadbd3d6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvh.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c +index 4393045c3..c78eb7fce 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvilvl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c +index ce28c4857..9e3cd7087 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c +index 644d2ce4b..b356dd1bf 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvinsve0.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c +index c1eda6c6c..f39a94ab0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvld.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c +index 84b3c6599..51e4661d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c +index f9634b128..6a04e7268 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c +index 6238685bc..5e5b35de5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c +index 5fa080375..bfa095dc8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c +index 40549448e..6a4704583 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c +index 683876933..d456cbfff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c +index f9f88b654..7f1c40c00 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c +index 5210e4cf9..abe92a605 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c +index 96c6671f2..4b8932ab0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c +index 38f2c0afe..561d964b1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmax-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c +index e804a0a45..cc52343ec 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c +index b6b34063c..2373c96ef 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmaxi-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c +index 7dbf335c1..9df0af7ed 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c +index 9eaa0e9e7..0eb03acbe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmin-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c +index 01aabada8..6579978b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c +index 8eb7d9355..7402ff6f0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmini-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c +index 6f34f6ffc..fd052cd81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c +index d0a9e9d2f..cb39dbbad 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c +index 15e66ae38..952725afc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskgez.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c +index 53b21f98b..22aa6ab0a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmskltz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c +index 81865fd32..6b48f8ab8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsknz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c +index 8c8d4996b..4e13f34dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c +index 58ad8bfcd..2e42c1d64 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c +index 85d24fe44..2d420c280 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmuh-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c +index be3c8e718..f14aa47ca 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmul.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c +index 01ff71649..e09174d08 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c +index 32088f4ae..2a4c09c52 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c +index 19157f682..7afa6ad94 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c +index 80fdcda63..ad69c1e47 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c +index 1a4b221fe..27a7fdd67 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c +index 9fcd3ce0c..c55d20d45 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvmulwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c +index 3cd1626d4..fe17ef13f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvneg.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c +index 3a491ecab..2b8e6228b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c +index 995a34c18..8a8062a99 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvnori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c +index 27eef710d..11643896c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c +index ee91af95f..0341bde95 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c +index fa6cdff31..de7a208c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvorn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c +index 33b96d657..e83957070 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c +index cdd20e881..ee335779f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpackod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c +index d2e742e81..7d6be3664 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpcnt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c +index 66faa74d0..831247beb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c +index a9778809f..65188ad41 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c +index a2edbb80a..d23406674 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c +index 8bd3a8273..2e18db108 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpickve2gr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvprem.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c +index 81456bc1b..1685747c0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c +index 7aa76c2ba..beeee765f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c +index a2bc2da52..5643b913f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplve0.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c +index 9346f9bfb..e9fc1d7d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvreplvei.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c +index 21446e55e..49439865c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c +index c1b8e1752..24d508f81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvrotri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c +index 2a4f29b50..cecac6173 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c +index a3afc9811..6cd4e0503 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsadd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c +index b4ac50271..29a4f5ae2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c +index e5ee89deb..571145b84 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsat-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c +index 2a42386ce..41b9470c1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c +index 5478d19c1..6c9b96460 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvseqi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c +index c8a00ca89..600168127 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf4i_b.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index 03c479a08..b8ab38711 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c +index 2a6eee0fd..5137f5de6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsigncov.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c +index ed752df00..13f8c8c4f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c +index bc98b41af..ef1784f67 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsle-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c +index 06717802c..21f68132b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c +index 093d5640e..0adadaa39 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslei-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c +index 7179e715c..4a2927624 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c +index 003e29b67..50e9a9f53 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c +index ef3a47da5..22a7a31a9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c +index 76651af63..4b68aeb18 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsllwil-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c +index ca1f5e94f..f44f083b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c +index 6864f5eb8..60278e22b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c +index 7dd2778a5..87d069d1f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c +index d93e4314e..9eefa782b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvslti-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c +index 2bf9ae9c3..b4bda4dab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsra.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c +index a51be899b..871d0241b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrai.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c +index e08934b12..eba7c1164 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c +index 44c20a954..96382483e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c +index fb47385c0..542b6fd3a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrar.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c +index 63ba92ead..cfd61ba40 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrari.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c +index c145f7ff3..c847e2812 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c +index b5c0fca74..c0ce0dd88 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c +index 1d591c35c..8ac09a026 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c +index e8696701f..dd0a09c4e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c +index d54991051..42a695875 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c +index 0fb6483cf..a7acf351d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c +index 22e62a3e7..c4e1e14e0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c +index 71f770aff..4a2e14712 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c +index cbc1de371..b17c7c4b3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c +index 8fc7a0029..bfca007d7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c +index fdb0c25f1..4648f751a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c +index dd3c2c6f6..25482aebc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c +index 7848ddd41..c284254ab 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c +index b1c16baf4..c39002ed5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c +index 356eb2182..09313d03c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c +index 116bebbb6..6d53719a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c +index 977061097..c812a1b0c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c +index b55e388b1..2683355fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c +index ada72a16a..dc187aa2c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c +index f42523850..b13ea88a7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvssub-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c +index 3c5e775ff..68a2cac21 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvst.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c +index c1de1e8d3..e940491a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c +index a3c0de6d3..8a1272685 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c +index caa72ca61..ba535d1e0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c +index 57d883c04..0a1d0277c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c +index 1687729d3..660c20da8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c +index 8d6ed92a1..9710d128c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvsubwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c +index 18b36c873..506e983da 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c +index 8fd6298f7..da7203af7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvxori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlasx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +index 2c37aa91d..d53bee52a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp ++++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp +@@ -33,9 +33,32 @@ if ![info exists DEFAULT_CFLAGS] then { + #Initialize `dg'. + dg-init + ++# If the target hardware supports LSX, the default action is "run", otherwise ++# just "compile". ++global dg-do-what-default ++if {[check_effective_target_loongarch_sx_hw]} then { ++ set dg-do-what-default run ++} else { ++ set dg-do-what-default compile ++} ++ + #Main loop. + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \ + " -mlsx" $DEFAULT_CFLAGS ++ ++dg-finish ++ ++dg-init ++# If the target hardware supports LASX, the default action is "run", otherwise ++# just "compile". ++ ++global dg-do-what-default ++if {[check_effective_target_loongarch_asx_hw]} then { ++ set dg-do-what-default run ++} else { ++ set dg-do-what-default compile ++} ++ + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \ + " -mlasx" $DEFAULT_CFLAGS + # All done. +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c +index e336581f3..8790470a4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c +index c1af80e14..77e027bdb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vabsd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c +index 7cfb989e4..e2c4f3ad3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c +index 4bb699eab..c7ce0a75b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vadda.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c +index 77afabe92..23f28bc34 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c +index b7b16a325..54503e22b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c +index a407cadfb..0b1e90959 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c +index 4d5c60998..eefd0be2a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c +index 0ebe8c8a9..1016afe21 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c +index 379517f39..befbf7049 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c +index 30dc83518..9365d242d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c +index 1597749b5..374b8b035 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vand.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c +index 906da69ca..ad4b5d307 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c +index 3ae2d7694..e645b9475 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vandn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c +index 2177ca3f6..0d7463eda 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c +index 1b0d879e4..bc16057ff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavg-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c +index 4b7262537..e494870bc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c +index 22908b1ea..ff9907dd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vavgr-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c +index 411dcaa40..d663653a0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c +index 5d7d66e06..9017d1541 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitclri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c +index ba4f4b6dc..5d6d1ef4b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c +index 9739182cd..1f730a688 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitrevi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c +index 52ac9939f..2239b3740 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitsel.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c +index f2d6fb042..d5818879f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c +index e05af675e..a1737c51b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitset.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c +index 540a724a7..577fbeb4a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbitseti.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c +index 34246c551..d60d8434f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c +index 986b7d566..a8d0e0fe2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vbsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c +index 2c1099a04..c386ed74c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclo.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c +index 12df2c670..aa3e54a8d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vclz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c +index cb4be0475..36ee4b83b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c +index f2bc7df27..7cf31e21c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vdiv-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c +index f6390800d..32db7a9c7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c +index 6ab217e97..78afaa8bb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vexth-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c +index 99854dbd8..998596169 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c +index 73bb530c9..31a3b5e42 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextl-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c +index 8d4158b57..e9187db90 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vextrins.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c +index 7ffbd385e..b4d65d678 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c +index 388430278..83b013b95 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c +index 9706d7adc..d570dcd24 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c +index 7166f954b..a3a5f44d8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfclass_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c +index b448c2076..d38b6ab9d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_caf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c +index 98941b47d..74ff46f89 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_ceq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c +index 409bce0ec..a40019e39 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cle.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c +index 39c9cf7a7..934169c6e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_clt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c +index c3da43bb4..c351daac0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cne.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c +index 5228dbede..8ca078c9e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c +index a2beff53f..b57cf604c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_cun.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c +index bfa4914be..6d35a4a30 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_saf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c +index bc573936d..07101104f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_seq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c +index 87cb8da7c..dd418110c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sle.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c +index 3845e8ec3..5b2e8d6a4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_slt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c +index 964eff79f..98a798c5f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sne.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c +index ea47baf40..413a81cb7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c +index 68cb5a52f..78c8f19a5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcmp_sun.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c +index d4a86e262..4d71b07ec 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c +index e8f4f12b9..476782ce4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfcvt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c +index 85db95762..4a54fe133 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c +index f8839cfcd..bb4ac9dfc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c +index 9150e27ca..e12e95367 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vffint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c +index cc36bf136..de5c46167 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c +index 624589620..3556daa72 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vflogb_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c +index c5de1ac7a..fa6ee6fd2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c +index 6b85e87bd..22a8f6b91 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c +index 442473fb4..bd942da1c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c +index 876588827..a5e513c73 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmax_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c +index c2766d5c6..ab8265bc2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c +index 5fcdedd3f..8a09f61fe 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfmaxa_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c +index 96b14aad6..0d0475a44 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c +index bf8414b49..58470aef1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfnmadd_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c +index c60ff2b46..0b1074016 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +index 12cb02303..61f28325a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + /* { dg-timeout 500 } */ + #include "../simd_correctness_check.h" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c +index ac0ade8b1..30d6ed51c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstp.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c +index a2b110f21..e74dfb0d5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrstpi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c +index 8a35dfe24..5bae5a67f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_d.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c +index ffd80540b..4a76ee69f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfsqrt_s.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c +index 8d0d56632..5bf753662 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c +index 5dba807f6..ffbdb0069 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c +index 7f6d2f4d1..d13f7d0d9 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c +index 9c5bb9131..2d6b92375 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vftint-4.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c +index af75f8e4e..ab3abf2a3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c +index 37c769a2d..078d229da 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhaddw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c +index 0b51cb8cf..1999543f4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c +index 26b51ee14..3d9b1a817 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vhsubw-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c +index aa802b295..aefcdb960 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvh.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c +index 88c66f220..4226f8683 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vilvl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c +index 2b9dcc0b5..c45d72667 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vinsgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c +index 7cd9abb7c..815ca0cdb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vld.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c +index 089500ea9..6ba93f73c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vldi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c +index 3fade5157..33369303f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmadd.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c +index d3fd83da7..2f55309ce 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c +index 839285685..0a48f655a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c +index bab2c6cf3..091343e82 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c +index 5875aa597..42d873b4c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c +index 4be7fce82..9f6aa3d12 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c +index 8a4c39502..6b06e204e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaddwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c +index b0e22f955..c96462994 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c +index 51a9a92e8..96db676e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmax-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c +index 7cff1d848..64c61f0a1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c +index b79af2228..27c50bdbb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmaxi-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c +index b2a7a35bd..d076ae8f2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c +index c90cae75e..c6e183fd4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmin-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c +index 772d040c3..e1e10cb60 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c +index 6eaae2134..c0e9a1a96 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmini-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c +index 5470d40dd..cade92d25 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c +index 8deb04427..4ecfff10c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c +index 64a950f81..717305270 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskgez.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c +index 8f743ec2e..cfccbb7e6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmskltz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c +index d547af0d3..1cd2e7cdc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsknz.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c +index 47cf33cfd..b4f171d20 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c +index ab650a024..8f630371e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c +index 60b6e3503..78b745a38 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmuh-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c +index 8ba666275..5f3c049a1 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmul.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c +index 8357f4e80..9a949ef18 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c +index e4afc8247..a16b518af 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c +index 346f0316a..5fbb48e81 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwev-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c +index 6eea49a61..570bd1d13 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c +index f3e4e0390..522f07950 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c +index 9f5702e2c..62d1e3420 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vmulwod-3.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c +index 9441ba50e..e077ce7d0 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vneg.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c +index a7a3acce9..80b2da43d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c +index a07a02ab2..fb43da265 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vnori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c +index 537a1bb3b..7686bcb5f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c +index 8a6e035c9..d40b093e6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c +index bb59bc312..6eb69cbf5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vorn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c +index 030e87fd8..17a43bbc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c +index 783eedae1..85ae43e63 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpackod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c +index 66982d89f..0b0200ed6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpcnt.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c +index 58591f1bb..5fd4af833 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickev.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c +index 74269e319..e41c2f8f2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickod.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c +index acca2bee9..5ec0a4d2a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpickve2gr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c +index ef0ad676e..36c9bf336 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vpremi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c +index a5f02b1b1..2f16a3483 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplgr2vr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c +index 463adb48e..6634b3a9f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplve.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c +index a81be76f1..157132c28 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vreplvei.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c +index c42440cea..286fe935a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c +index 4ae4dbf8b..81b16542f 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vrotri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c +index 1bc27c983..3eda1f166 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c +index 67d189991..d08f84481 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c +index cd8eefb47..0cf4c664b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c +index 31e3919bf..d709dbdb7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsat-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c +index 4362941ab..a031aaeb3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseq.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c +index c16a291de..f33c4a8b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vseqi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index 646935c92..f3b800f88 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c +index cd441b841..ee4a7e5b7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf4i.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c +index 0fb1bc18f..933cb3b0b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsigncov.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c +index a26eb0a3d..febb6345a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c +index 15c6cedc2..80b2db335 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsle-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c +index 0e72a33dd..e78a8b07c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c +index 685a1bb36..361d41a04 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslei-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c +index 7b8ad7d5a..169627dd3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsll.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c +index 7a77e80c0..6a3978317 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c +index 796e88cad..985e32a24 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c +index 5f46293dc..b20f92ef8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsllwil-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c +index 15c96ccfe..8ce161e92 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c +index e8d69f0e9..6f8ddd219 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslt-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c +index 5bf3ce6e8..442abf65b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c +index 768df528f..8dbba943a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vslti-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c +index fd7c22a82..1285aa86a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsra.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c +index 2ca4f0b7a..efccd1822 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrai.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c +index 4e7c7ab7e..ad6dd0908 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c +index 92988035d..6cfec397d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c +index 6a842d9ce..b4ff77206 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrar.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c +index 2a353d65a..8ddc6157d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrari.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c +index 60d474203..a0ecbc7dd 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c +index 3aa23bdc8..6abc66b89 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c +index f9c789855..9f59d5ea4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrl.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c +index 7b5e9a7bf..29e51a34d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrli.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c +index 5a8f4f70a..28e8a3ff2 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c +index ca462c834..94b58e65c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c +index 211339bb8..ae9d88518 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlr.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c +index 2c3a53416..d18448ea7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlri.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c +index c630b4261..639361d7b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c +index 468a17c15..11f19c249 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c +index e45ca36f0..5ab683fd5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssran.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c +index 7ffcecde7..526fb15dc 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrani.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c +index a23ad7cd2..b3c0c37c3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c +index 76fac97be..7785e9f59 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrarni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c +index ed600c72d..a07d5c541 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrln.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c +index 613668143..2189b8167 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c +index ec688bb12..e1a633096 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrn.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c +index 02f7ca08b..7035d256e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssrlrni.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c +index fc4cbb4e5..d7a5d7f30 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c +index 0d5987567..028664bd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vssub-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c +index 8afdffa50..ad0eef8d3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vst.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c +index f5c82bc74..01907dcee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsub.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c +index 37e0ccf4d..35cd761ee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubi.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c +index f0d391a09..358775ed3 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c +index 3b18bc13c..986ead074 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwev-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c +index 39ebff154..9c8688432 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-1.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c +index 62837f1ac..c762b88f8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsubwod-2.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c +index 72fa97174..0b9ba4709 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxor.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c +index cc823d4ba..08ceab6ee 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vxori.c +@@ -1,4 +1,3 @@ +-/* { dg-do run } */ + /* { dg-options "-mlsx -w -fno-strict-aliasing" } */ + #include "../simd_correctness_check.h" + #include +-- +2.43.0 + diff --git a/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch b/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch new file mode 100644 index 0000000..57c2817 --- /dev/null +++ b/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch @@ -0,0 +1,148 @@ +From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Tue, 28 Nov 2023 15:38:37 +0800 +Subject: [PATCH 053/188] LoongArch: Accelerate optimization of scalar + signed/unsigned popcount. + +In LoongArch, the vector popcount has corresponding instructions, while +the scalar does not. Currently, the scalar popcount is calculated +through a loop, and the value of a non-power of two needs to be iterated +several times, so the vector popcount instruction is considered for +optimization. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (v2di): Used to simplify the + following templates. + (popcount2): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/popcnt.c: New test. + * gcc.target/loongarch/popcount.c: New test. +--- + gcc/config/loongarch/loongarch.md | 27 +++++++++++- + gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++ + 3 files changed, 83 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 11577f407..cfd7a8ec6 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1512,7 +1512,30 @@ + (set_attr "cnv_mode" "D2S") + (set_attr "mode" "SF")]) + +- ++;; In vector registers, popcount can be implemented directly through ++;; the vector instruction [X]VPCNT. For GP registers, we can implement ++;; it through the following method. Compared with loop implementation ++;; of popcount, the following method has better performance. ++ ++;; This attribute used for get connection of scalar mode and corresponding ++;; vector mode. ++(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) ++ ++(define_expand "popcount2" ++ [(set (match_operand:GPR 0 "register_operand") ++ (popcount:GPR (match_operand:GPR 1 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ rtx in = operands[1]; ++ rtx out = operands[0]; ++ rtx vreg = mode == SImode ? gen_reg_rtx (V4SImode) : ++ gen_reg_rtx (V2DImode); ++ emit_insn (gen_lsx_vinsgr2vr_ (vreg, in, vreg, GEN_INT (1))); ++ emit_insn (gen_popcount2 (vreg, vreg)); ++ emit_insn (gen_lsx_vpickve2gr_ (out, vreg, GEN_INT (0))); ++ DONE; ++}) ++ + ;; + ;; .................... + ;; +@@ -3879,7 +3902,7 @@ + (any_extend:SI (match_dup 3)))])] + "") + +- ++ + + (define_mode_iterator QHSD [QI HI SI DI]) + +diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c +new file mode 100644 +index 000000000..a10fca420 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c +@@ -0,0 +1,41 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-not {popcount} } } */ ++/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */ ++/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */ ++ ++int ++foo (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++long ++foo1 (long x) ++{ ++ return __builtin_popcountl (x); ++} ++ ++long long ++foo2 (long long x) ++{ ++ return __builtin_popcountll (x); ++} ++ ++int ++foo3 (int *p) ++{ ++ return __builtin_popcount (*p); ++} ++ ++unsigned ++foo4 (int x) ++{ ++ return __builtin_popcount (x); ++} ++ ++unsigned long ++foo5 (int x) ++{ ++ return __builtin_popcount (x); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c +new file mode 100644 +index 000000000..390ff0676 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/popcount.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */ ++ ++int ++PopCount (long b) ++{ ++ int c = 0; ++ ++ while (b) ++ { ++ b &= b - 1; ++ c++; ++ } ++ ++ return c; ++} +-- +2.43.0 + diff --git a/0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch b/0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch new file mode 100644 index 0000000..5dc5e27 --- /dev/null +++ b/0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch @@ -0,0 +1,163 @@ +From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Tue, 28 Nov 2023 15:39:00 +0800 +Subject: [PATCH 054/188] LoongArch: Optimize vector constant + extract-{even/odd} permutation. + +For vector constant extract-{even/odd} permutation replace the default +[x]vshuf instruction combination with [x]vilv{l/h} instruction, which +can reduce instructions and improves performance. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Supplementary function prototype. + (loongarch_is_even_extraction): Adjust. + (loongarch_try_expand_lsx_vshuf_const): Adjust. + (loongarch_is_extraction_permutation): Adjust. + (loongarch_expand_vec_perm_const_2): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 33 +++++++++++- + .../loongarch/lasx-extract-even_odd-opt.c | 54 +++++++++++++++++++ + 2 files changed, 85 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ecceca22d..3ef7e3605 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + ++static bool ++loongarch_is_odd_extraction (struct expand_vec_perm_d *); ++ ++static bool ++loongarch_is_even_extraction (struct expand_vec_perm_d *); ++ + static bool + loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + { +@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->testing_p) + return true; + ++ /* If match extract-even and extract-odd permutations pattern, use ++ * vselect much better than vshuf. */ ++ if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) ++ { ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, d->nelt)) ++ return true; ++ ++ unsigned char perm2[MAX_VECT_LEN]; ++ for (i = 0; i < d->nelt; ++i) ++ perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); ++ ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, ++ perm2, d->nelt)) ++ return true; ++ } ++ + for (i = 0; i < d->nelt; i += 1) + { + rperm[i] = GEN_INT (d->perm[i]); +@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 1; ++ buf += 2; + } + + return result; +@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + result = false; + break; + } +- buf += 2; ++ buf += 1; + } + + return result; +@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector after: { 1, 3, 1, 3 }. + Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } + Selector after: { 0, 2, 0, 2 }. */ ++ ++ /* Better implement of extract-even and extract-odd permutations. */ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ + for (i = 0; i < d->nelt / 2; i += 1) + { + idx = d->perm[i]; +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +new file mode 100644 +index 000000000..515f0c862 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++/* { dg-final { scan-assembler "xvilvl.d" } } */ ++/* { dg-final { scan-assembler "xvilvh.d" } } */ ++ ++#define CMUL(a, b, c) \ ++ { \ ++ (c).ai = (a).ai * (b).ai - (a).bi * (b).bi; \ ++ (c).bi = (a).ai * (b).bi + (a).bi * (b).ai; \ ++ (c).ci = (a).ci * (b).ci - (a).di * (b).di; \ ++ (c).di = (a).ci * (b).di + (a).di * (b).ci; \ ++ } ++#define CSUM(a, b) \ ++ { \ ++ (a).ai += (b).ai; \ ++ (a).bi += (b).bi; \ ++ (a).ci += (b).ci; \ ++ (a).di += (b).di; \ ++ } ++ ++typedef struct ++{ ++ double ai; ++ double bi; ++ double ci; ++ double di; ++} complex; ++ ++typedef struct ++{ ++ complex e[6][6]; ++} matrix; ++ ++typedef struct ++{ ++ complex c[6]; ++} vector; ++ ++void ++mult_adj_mat_vec (matrix *a, vector *b, vector *c) ++{ ++ register int i, j; ++ register complex x, y; ++ for (i = 0; i < 6; i++) ++ { ++ x.ai = x.bi = x.ci = x.di = 0.0; ++ for (j = 0; j < 6; j++) ++ { ++ CMUL (a->e[j][i], b->c[j], y); ++ CSUM (x, y); ++ } ++ c->c[i] = x; ++ } ++} +-- +2.43.0 + diff --git a/0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch b/0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch new file mode 100644 index 0000000..5d29011 --- /dev/null +++ b/0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch @@ -0,0 +1,1697 @@ +From 548322a75cdeb96960fb9d324a2abf8735c4d254 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 7 Nov 2023 11:53:39 +0800 +Subject: [PATCH 055/188] LoongArch: Add intrinsic function descriptions for + LSX and LASX instructions to doc. + +gcc/ChangeLog: + + * doc/extend.texi: Add information about the intrinsic function of the vector + instruction. +--- + gcc/doc/extend.texi | 1662 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1662 insertions(+) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 497c6de5f..7edd3974d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -14679,6 +14679,8 @@ instructions, but allow the compiler to schedule those calls. + * BPF Built-in Functions:: + * FR-V Built-in Functions:: + * LoongArch Base Built-in Functions:: ++* LoongArch SX Vector Intrinsics:: ++* LoongArch ASX Vector Intrinsics:: + * MIPS DSP Built-in Functions:: + * MIPS Paired-Single Support:: + * MIPS Loongson Built-in Functions:: +@@ -16262,6 +16264,1666 @@ Returns the value that is currently set in the @samp{tp} register. + void * __builtin_thread_pointer (void) + @end smallexample + ++@node LoongArch SX Vector Intrinsics ++@subsection LoongArch SX Vector Intrinsics ++ ++GCC provides intrinsics to access the LSX (Loongson SIMD Extension) instructions. ++The interface is made available by including @code{} and using ++@option{-mlsx}. ++ ++The following vectors typedefs are included in @code{lsxintrin.h}: ++ ++@itemize ++@item @code{__m128i}, a 128-bit vector of fixed point; ++@item @code{__m128}, a 128-bit vector of single precision floating point; ++@item @code{__m128d}, a 128-bit vector of double precision floating point. ++@end itemize ++ ++Instructions and corresponding built-ins may have additional restrictions and/or ++input/output values manipulated: ++@itemize ++@item @code{imm0_1}, an integer literal in range 0 to 1; ++@item @code{imm0_3}, an integer literal in range 0 to 3; ++@item @code{imm0_7}, an integer literal in range 0 to 7; ++@item @code{imm0_15}, an integer literal in range 0 to 15; ++@item @code{imm0_31}, an integer literal in range 0 to 31; ++@item @code{imm0_63}, an integer literal in range 0 to 63; ++@item @code{imm0_127}, an integer literal in range 0 to 127; ++@item @code{imm0_255}, an integer literal in range 0 to 255; ++@item @code{imm_n16_15}, an integer literal in range -16 to 15; ++@item @code{imm_n128_127}, an integer literal in range -128 to 127; ++@item @code{imm_n256_255}, an integer literal in range -256 to 255; ++@item @code{imm_n512_511}, an integer literal in range -512 to 511; ++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023; ++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047. ++@end itemize ++ ++For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and ++@code{__lsx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows: ++ ++@smallexample ++a. @code{__lsx_vrepli_@{b/h/w/d@}}: Implemented the case where the highest ++ bit of @code{vldi} instruction @code{i13} is 1. ++ ++ i13[12] == 1'b0 ++ case i13[11:10] of : ++ 2'b00: __lsx_vrepli_b (imm_n512_511) ++ 2'b01: __lsx_vrepli_h (imm_n512_511) ++ 2'b10: __lsx_vrepli_w (imm_n512_511) ++ 2'b11: __lsx_vrepli_d (imm_n512_511) ++ ++b. @code{__lsx_b[n]z_@{v/b/h/w/d@}}: Since the @code{vseteqz} class directive ++ cannot be used on its own, this function is defined. ++ ++ _lsx_bz_v => vseteqz.v + bcnez ++ _lsx_bnz_v => vsetnez.v + bcnez ++ _lsx_bz_b => vsetanyeqz.b + bcnez ++ _lsx_bz_h => vsetanyeqz.h + bcnez ++ _lsx_bz_w => vsetanyeqz.w + bcnez ++ _lsx_bz_d => vsetanyeqz.d + bcnez ++ _lsx_bnz_b => vsetallnez.b + bcnez ++ _lsx_bnz_h => vsetallnez.h + bcnez ++ _lsx_bnz_w => vsetallnez.w + bcnez ++ _lsx_bnz_d => vsetallnez.d + bcnez ++@end smallexample ++ ++@smallexample ++eg: ++ #include ++ ++ extern __m128i @var{a}; ++ ++ void ++ test (void) ++ @{ ++ if (__lsx_bz_v (@var{a})) ++ printf ("1\n"); ++ else ++ printf ("2\n"); ++ @} ++@end smallexample ++ ++@emph{Note:} For directives where the intent operand is also the source operand ++(modifying only part of the bitfield of the intent register), the first parameter ++in the builtin call function is used as the intent operand. ++ ++@smallexample ++eg: ++ #include ++ ++ extern __m128i @var{dst}; ++ extern int @var{src}; ++ ++ void ++ test (void) ++ @{ ++ @var{dst} = __lsx_vinsgr2vr_b (@var{dst}, @var{src}, 3); ++ @} ++@end smallexample ++ ++The intrinsics provided are listed below: ++@smallexample ++int __lsx_bnz_b (__m128i); ++int __lsx_bnz_d (__m128i); ++int __lsx_bnz_h (__m128i); ++int __lsx_bnz_v (__m128i); ++int __lsx_bnz_w (__m128i); ++int __lsx_bz_b (__m128i); ++int __lsx_bz_d (__m128i); ++int __lsx_bz_h (__m128i); ++int __lsx_bz_v (__m128i); ++int __lsx_bz_w (__m128i); ++__m128i __lsx_vabsd_b (__m128i, __m128i); ++__m128i __lsx_vabsd_bu (__m128i, __m128i); ++__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_du (__m128i, __m128i); ++__m128i __lsx_vabsd_h (__m128i, __m128i); ++__m128i __lsx_vabsd_hu (__m128i, __m128i); ++__m128i __lsx_vabsd_w (__m128i, __m128i); ++__m128i __lsx_vabsd_wu (__m128i, __m128i); ++__m128i __lsx_vadda_b (__m128i, __m128i); ++__m128i __lsx_vadda_d (__m128i, __m128i); ++__m128i __lsx_vadda_h (__m128i, __m128i); ++__m128i __lsx_vadda_w (__m128i, __m128i); ++__m128i __lsx_vadd_b (__m128i, __m128i); ++__m128i __lsx_vadd_d (__m128i, __m128i); ++__m128i __lsx_vadd_h (__m128i, __m128i); ++__m128i __lsx_vaddi_bu (__m128i, imm0_31); ++__m128i __lsx_vaddi_du (__m128i, imm0_31); ++__m128i __lsx_vaddi_hu (__m128i, imm0_31); ++__m128i __lsx_vaddi_wu (__m128i, imm0_31); ++__m128i __lsx_vadd_q (__m128i, __m128i); ++__m128i __lsx_vadd_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwev_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwev_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwev_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwev_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vaddwod_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vaddwod_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du (__m128i, __m128i); ++__m128i __lsx_vaddwod_q_du_d (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_h (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vaddwod_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vandi_b (__m128i, imm0_255); ++__m128i __lsx_vandn_v (__m128i, __m128i); ++__m128i __lsx_vand_v (__m128i, __m128i); ++__m128i __lsx_vavg_b (__m128i, __m128i); ++__m128i __lsx_vavg_bu (__m128i, __m128i); ++__m128i __lsx_vavg_d (__m128i, __m128i); ++__m128i __lsx_vavg_du (__m128i, __m128i); ++__m128i __lsx_vavg_h (__m128i, __m128i); ++__m128i __lsx_vavg_hu (__m128i, __m128i); ++__m128i __lsx_vavgr_b (__m128i, __m128i); ++__m128i __lsx_vavgr_bu (__m128i, __m128i); ++__m128i __lsx_vavgr_d (__m128i, __m128i); ++__m128i __lsx_vavgr_du (__m128i, __m128i); ++__m128i __lsx_vavgr_h (__m128i, __m128i); ++__m128i __lsx_vavgr_hu (__m128i, __m128i); ++__m128i __lsx_vavgr_w (__m128i, __m128i); ++__m128i __lsx_vavgr_wu (__m128i, __m128i); ++__m128i __lsx_vavg_w (__m128i, __m128i); ++__m128i __lsx_vavg_wu (__m128i, __m128i); ++__m128i __lsx_vbitclr_b (__m128i, __m128i); ++__m128i __lsx_vbitclr_d (__m128i, __m128i); ++__m128i __lsx_vbitclr_h (__m128i, __m128i); ++__m128i __lsx_vbitclri_b (__m128i, imm0_7); ++__m128i __lsx_vbitclri_d (__m128i, imm0_63); ++__m128i __lsx_vbitclri_h (__m128i, imm0_15); ++__m128i __lsx_vbitclri_w (__m128i, imm0_31); ++__m128i __lsx_vbitclr_w (__m128i, __m128i); ++__m128i __lsx_vbitrev_b (__m128i, __m128i); ++__m128i __lsx_vbitrev_d (__m128i, __m128i); ++__m128i __lsx_vbitrev_h (__m128i, __m128i); ++__m128i __lsx_vbitrevi_b (__m128i, imm0_7); ++__m128i __lsx_vbitrevi_d (__m128i, imm0_63); ++__m128i __lsx_vbitrevi_h (__m128i, imm0_15); ++__m128i __lsx_vbitrevi_w (__m128i, imm0_31); ++__m128i __lsx_vbitrev_w (__m128i, __m128i); ++__m128i __lsx_vbitseli_b (__m128i, __m128i, imm0_255); ++__m128i __lsx_vbitsel_v (__m128i, __m128i, __m128i); ++__m128i __lsx_vbitset_b (__m128i, __m128i); ++__m128i __lsx_vbitset_d (__m128i, __m128i); ++__m128i __lsx_vbitset_h (__m128i, __m128i); ++__m128i __lsx_vbitseti_b (__m128i, imm0_7); ++__m128i __lsx_vbitseti_d (__m128i, imm0_63); ++__m128i __lsx_vbitseti_h (__m128i, imm0_15); ++__m128i __lsx_vbitseti_w (__m128i, imm0_31); ++__m128i __lsx_vbitset_w (__m128i, __m128i); ++__m128i __lsx_vbsll_v (__m128i, imm0_31); ++__m128i __lsx_vbsrl_v (__m128i, imm0_31); ++__m128i __lsx_vclo_b (__m128i); ++__m128i __lsx_vclo_d (__m128i); ++__m128i __lsx_vclo_h (__m128i); ++__m128i __lsx_vclo_w (__m128i); ++__m128i __lsx_vclz_b (__m128i); ++__m128i __lsx_vclz_d (__m128i); ++__m128i __lsx_vclz_h (__m128i); ++__m128i __lsx_vclz_w (__m128i); ++__m128i __lsx_vdiv_b (__m128i, __m128i); ++__m128i __lsx_vdiv_bu (__m128i, __m128i); ++__m128i __lsx_vdiv_d (__m128i, __m128i); ++__m128i __lsx_vdiv_du (__m128i, __m128i); ++__m128i __lsx_vdiv_h (__m128i, __m128i); ++__m128i __lsx_vdiv_hu (__m128i, __m128i); ++__m128i __lsx_vdiv_w (__m128i, __m128i); ++__m128i __lsx_vdiv_wu (__m128i, __m128i); ++__m128i __lsx_vexth_du_wu (__m128i); ++__m128i __lsx_vexth_d_w (__m128i); ++__m128i __lsx_vexth_h_b (__m128i); ++__m128i __lsx_vexth_hu_bu (__m128i); ++__m128i __lsx_vexth_q_d (__m128i); ++__m128i __lsx_vexth_qu_du (__m128i); ++__m128i __lsx_vexth_w_h (__m128i); ++__m128i __lsx_vexth_wu_hu (__m128i); ++__m128i __lsx_vextl_q_d (__m128i); ++__m128i __lsx_vextl_qu_du (__m128i); ++__m128i __lsx_vextrins_b (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_d (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_h (__m128i, __m128i, imm0_255); ++__m128i __lsx_vextrins_w (__m128i, __m128i, imm0_255); ++__m128d __lsx_vfadd_d (__m128d, __m128d); ++__m128 __lsx_vfadd_s (__m128, __m128); ++__m128i __lsx_vfclass_d (__m128d); ++__m128i __lsx_vfclass_s (__m128); ++__m128i __lsx_vfcmp_caf_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_caf_s (__m128, __m128); ++__m128i __lsx_vfcmp_ceq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_ceq_s (__m128, __m128); ++__m128i __lsx_vfcmp_cle_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cle_s (__m128, __m128); ++__m128i __lsx_vfcmp_clt_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_clt_s (__m128, __m128); ++__m128i __lsx_vfcmp_cne_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cne_s (__m128, __m128); ++__m128i __lsx_vfcmp_cor_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cor_s (__m128, __m128); ++__m128i __lsx_vfcmp_cueq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cueq_s (__m128, __m128); ++__m128i __lsx_vfcmp_cule_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cule_s (__m128, __m128); ++__m128i __lsx_vfcmp_cult_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cult_s (__m128, __m128); ++__m128i __lsx_vfcmp_cun_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cune_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_cune_s (__m128, __m128); ++__m128i __lsx_vfcmp_cun_s (__m128, __m128); ++__m128i __lsx_vfcmp_saf_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_saf_s (__m128, __m128); ++__m128i __lsx_vfcmp_seq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_seq_s (__m128, __m128); ++__m128i __lsx_vfcmp_sle_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sle_s (__m128, __m128); ++__m128i __lsx_vfcmp_slt_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_slt_s (__m128, __m128); ++__m128i __lsx_vfcmp_sne_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sne_s (__m128, __m128); ++__m128i __lsx_vfcmp_sor_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sor_s (__m128, __m128); ++__m128i __lsx_vfcmp_sueq_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sueq_s (__m128, __m128); ++__m128i __lsx_vfcmp_sule_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sule_s (__m128, __m128); ++__m128i __lsx_vfcmp_sult_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sult_s (__m128, __m128); ++__m128i __lsx_vfcmp_sun_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sune_d (__m128d, __m128d); ++__m128i __lsx_vfcmp_sune_s (__m128, __m128); ++__m128i __lsx_vfcmp_sun_s (__m128, __m128); ++__m128d __lsx_vfcvth_d_s (__m128); ++__m128i __lsx_vfcvt_h_s (__m128, __m128); ++__m128 __lsx_vfcvth_s_h (__m128i); ++__m128d __lsx_vfcvtl_d_s (__m128); ++__m128 __lsx_vfcvtl_s_h (__m128i); ++__m128 __lsx_vfcvt_s_d (__m128d, __m128d); ++__m128d __lsx_vfdiv_d (__m128d, __m128d); ++__m128 __lsx_vfdiv_s (__m128, __m128); ++__m128d __lsx_vffint_d_l (__m128i); ++__m128d __lsx_vffint_d_lu (__m128i); ++__m128d __lsx_vffinth_d_w (__m128i); ++__m128d __lsx_vffintl_d_w (__m128i); ++__m128 __lsx_vffint_s_l (__m128i, __m128i); ++__m128 __lsx_vffint_s_w (__m128i); ++__m128 __lsx_vffint_s_wu (__m128i); ++__m128d __lsx_vflogb_d (__m128d); ++__m128 __lsx_vflogb_s (__m128); ++__m128d __lsx_vfmadd_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfmadd_s (__m128, __m128, __m128); ++__m128d __lsx_vfmaxa_d (__m128d, __m128d); ++__m128 __lsx_vfmaxa_s (__m128, __m128); ++__m128d __lsx_vfmax_d (__m128d, __m128d); ++__m128 __lsx_vfmax_s (__m128, __m128); ++__m128d __lsx_vfmina_d (__m128d, __m128d); ++__m128 __lsx_vfmina_s (__m128, __m128); ++__m128d __lsx_vfmin_d (__m128d, __m128d); ++__m128 __lsx_vfmin_s (__m128, __m128); ++__m128d __lsx_vfmsub_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfmsub_s (__m128, __m128, __m128); ++__m128d __lsx_vfmul_d (__m128d, __m128d); ++__m128 __lsx_vfmul_s (__m128, __m128); ++__m128d __lsx_vfnmadd_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfnmadd_s (__m128, __m128, __m128); ++__m128d __lsx_vfnmsub_d (__m128d, __m128d, __m128d); ++__m128 __lsx_vfnmsub_s (__m128, __m128, __m128); ++__m128d __lsx_vfrecip_d (__m128d); ++__m128 __lsx_vfrecip_s (__m128); ++__m128d __lsx_vfrint_d (__m128d); ++__m128i __lsx_vfrintrm_d (__m128d); ++__m128i __lsx_vfrintrm_s (__m128); ++__m128i __lsx_vfrintrne_d (__m128d); ++__m128i __lsx_vfrintrne_s (__m128); ++__m128i __lsx_vfrintrp_d (__m128d); ++__m128i __lsx_vfrintrp_s (__m128); ++__m128i __lsx_vfrintrz_d (__m128d); ++__m128i __lsx_vfrintrz_s (__m128); ++__m128 __lsx_vfrint_s (__m128); ++__m128d __lsx_vfrsqrt_d (__m128d); ++__m128 __lsx_vfrsqrt_s (__m128); ++__m128i __lsx_vfrstp_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vfrstp_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vfrstpi_b (__m128i, __m128i, imm0_31); ++__m128i __lsx_vfrstpi_h (__m128i, __m128i, imm0_31); ++__m128d __lsx_vfsqrt_d (__m128d); ++__m128 __lsx_vfsqrt_s (__m128); ++__m128d __lsx_vfsub_d (__m128d, __m128d); ++__m128 __lsx_vfsub_s (__m128, __m128); ++__m128i __lsx_vftinth_l_s (__m128); ++__m128i __lsx_vftint_l_d (__m128d); ++__m128i __lsx_vftintl_l_s (__m128); ++__m128i __lsx_vftint_lu_d (__m128d); ++__m128i __lsx_vftintrmh_l_s (__m128); ++__m128i __lsx_vftintrm_l_d (__m128d); ++__m128i __lsx_vftintrml_l_s (__m128); ++__m128i __lsx_vftintrm_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrm_w_s (__m128); ++__m128i __lsx_vftintrneh_l_s (__m128); ++__m128i __lsx_vftintrne_l_d (__m128d); ++__m128i __lsx_vftintrnel_l_s (__m128); ++__m128i __lsx_vftintrne_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrne_w_s (__m128); ++__m128i __lsx_vftintrph_l_s (__m128); ++__m128i __lsx_vftintrp_l_d (__m128d); ++__m128i __lsx_vftintrpl_l_s (__m128); ++__m128i __lsx_vftintrp_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrp_w_s (__m128); ++__m128i __lsx_vftintrzh_l_s (__m128); ++__m128i __lsx_vftintrz_l_d (__m128d); ++__m128i __lsx_vftintrzl_l_s (__m128); ++__m128i __lsx_vftintrz_lu_d (__m128d); ++__m128i __lsx_vftintrz_w_d (__m128d, __m128d); ++__m128i __lsx_vftintrz_w_s (__m128); ++__m128i __lsx_vftintrz_wu_s (__m128); ++__m128i __lsx_vftint_w_d (__m128d, __m128d); ++__m128i __lsx_vftint_w_s (__m128); ++__m128i __lsx_vftint_wu_s (__m128); ++__m128i __lsx_vhaddw_du_wu (__m128i, __m128i); ++__m128i __lsx_vhaddw_d_w (__m128i, __m128i); ++__m128i __lsx_vhaddw_h_b (__m128i, __m128i); ++__m128i __lsx_vhaddw_hu_bu (__m128i, __m128i); ++__m128i __lsx_vhaddw_q_d (__m128i, __m128i); ++__m128i __lsx_vhaddw_qu_du (__m128i, __m128i); ++__m128i __lsx_vhaddw_w_h (__m128i, __m128i); ++__m128i __lsx_vhaddw_wu_hu (__m128i, __m128i); ++__m128i __lsx_vhsubw_du_wu (__m128i, __m128i); ++__m128i __lsx_vhsubw_d_w (__m128i, __m128i); ++__m128i __lsx_vhsubw_h_b (__m128i, __m128i); ++__m128i __lsx_vhsubw_hu_bu (__m128i, __m128i); ++__m128i __lsx_vhsubw_q_d (__m128i, __m128i); ++__m128i __lsx_vhsubw_qu_du (__m128i, __m128i); ++__m128i __lsx_vhsubw_w_h (__m128i, __m128i); ++__m128i __lsx_vhsubw_wu_hu (__m128i, __m128i); ++__m128i __lsx_vilvh_b (__m128i, __m128i); ++__m128i __lsx_vilvh_d (__m128i, __m128i); ++__m128i __lsx_vilvh_h (__m128i, __m128i); ++__m128i __lsx_vilvh_w (__m128i, __m128i); ++__m128i __lsx_vilvl_b (__m128i, __m128i); ++__m128i __lsx_vilvl_d (__m128i, __m128i); ++__m128i __lsx_vilvl_h (__m128i, __m128i); ++__m128i __lsx_vilvl_w (__m128i, __m128i); ++__m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15); ++__m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1); ++__m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7); ++__m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3); ++__m128i __lsx_vld (void *, imm_n2048_2047) ++__m128i __lsx_vldi (imm_n1024_1023) ++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047) ++__m128i __lsx_vldrepl_d (void *, imm_n256_255) ++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023) ++__m128i __lsx_vldrepl_w (void *, imm_n512_511) ++__m128i __lsx_vldx (void *, long int); ++__m128i __lsx_vmadd_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmadd_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_wu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_d_wu_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_bu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_h_bu_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_du (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_q_du_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_hu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwev_w_hu_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_wu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_d_wu_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_bu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_h_bu_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_du (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_q_du_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_hu (__m128i, __m128i, __m128i); ++__m128i __lsx_vmaddwod_w_hu_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmax_b (__m128i, __m128i); ++__m128i __lsx_vmax_bu (__m128i, __m128i); ++__m128i __lsx_vmax_d (__m128i, __m128i); ++__m128i __lsx_vmax_du (__m128i, __m128i); ++__m128i __lsx_vmax_h (__m128i, __m128i); ++__m128i __lsx_vmax_hu (__m128i, __m128i); ++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_bu (__m128i, imm0_31); ++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_du (__m128i, imm0_31); ++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_hu (__m128i, imm0_31); ++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_wu (__m128i, imm0_31); ++__m128i __lsx_vmax_w (__m128i, __m128i); ++__m128i __lsx_vmax_wu (__m128i, __m128i); ++__m128i __lsx_vmin_b (__m128i, __m128i); ++__m128i __lsx_vmin_bu (__m128i, __m128i); ++__m128i __lsx_vmin_d (__m128i, __m128i); ++__m128i __lsx_vmin_du (__m128i, __m128i); ++__m128i __lsx_vmin_h (__m128i, __m128i); ++__m128i __lsx_vmin_hu (__m128i, __m128i); ++__m128i __lsx_vmini_b (__m128i, imm_n16_15) ++__m128i __lsx_vmini_bu (__m128i, imm0_31); ++__m128i __lsx_vmini_d (__m128i, imm_n16_15) ++__m128i __lsx_vmini_du (__m128i, imm0_31); ++__m128i __lsx_vmini_h (__m128i, imm_n16_15) ++__m128i __lsx_vmini_hu (__m128i, imm0_31); ++__m128i __lsx_vmini_w (__m128i, imm_n16_15) ++__m128i __lsx_vmini_wu (__m128i, imm0_31); ++__m128i __lsx_vmin_w (__m128i, __m128i); ++__m128i __lsx_vmin_wu (__m128i, __m128i); ++__m128i __lsx_vmod_b (__m128i, __m128i); ++__m128i __lsx_vmod_bu (__m128i, __m128i); ++__m128i __lsx_vmod_d (__m128i, __m128i); ++__m128i __lsx_vmod_du (__m128i, __m128i); ++__m128i __lsx_vmod_h (__m128i, __m128i); ++__m128i __lsx_vmod_hu (__m128i, __m128i); ++__m128i __lsx_vmod_w (__m128i, __m128i); ++__m128i __lsx_vmod_wu (__m128i, __m128i); ++__m128i __lsx_vmskgez_b (__m128i); ++__m128i __lsx_vmskltz_b (__m128i); ++__m128i __lsx_vmskltz_d (__m128i); ++__m128i __lsx_vmskltz_h (__m128i); ++__m128i __lsx_vmskltz_w (__m128i); ++__m128i __lsx_vmsknz_b (__m128i); ++__m128i __lsx_vmsub_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vmsub_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vmuh_b (__m128i, __m128i); ++__m128i __lsx_vmuh_bu (__m128i, __m128i); ++__m128i __lsx_vmuh_d (__m128i, __m128i); ++__m128i __lsx_vmuh_du (__m128i, __m128i); ++__m128i __lsx_vmuh_h (__m128i, __m128i); ++__m128i __lsx_vmuh_hu (__m128i, __m128i); ++__m128i __lsx_vmuh_w (__m128i, __m128i); ++__m128i __lsx_vmuh_wu (__m128i, __m128i); ++__m128i __lsx_vmul_b (__m128i, __m128i); ++__m128i __lsx_vmul_d (__m128i, __m128i); ++__m128i __lsx_vmul_h (__m128i, __m128i); ++__m128i __lsx_vmul_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vmulwev_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_b (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vmulwev_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_d (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_du (__m128i, __m128i); ++__m128i __lsx_vmulwev_q_du_d (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_h (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vmulwev_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_w (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vmulwod_d_wu_w (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_b (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vmulwod_h_bu_b (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_d (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_du (__m128i, __m128i); ++__m128i __lsx_vmulwod_q_du_d (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_h (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vmulwod_w_hu_h (__m128i, __m128i); ++__m128i __lsx_vneg_b (__m128i); ++__m128i __lsx_vneg_d (__m128i); ++__m128i __lsx_vneg_h (__m128i); ++__m128i __lsx_vneg_w (__m128i); ++__m128i __lsx_vnori_b (__m128i, imm0_255); ++__m128i __lsx_vnor_v (__m128i, __m128i); ++__m128i __lsx_vori_b (__m128i, imm0_255); ++__m128i __lsx_vorn_v (__m128i, __m128i); ++__m128i __lsx_vor_v (__m128i, __m128i); ++__m128i __lsx_vpackev_b (__m128i, __m128i); ++__m128i __lsx_vpackev_d (__m128i, __m128i); ++__m128i __lsx_vpackev_h (__m128i, __m128i); ++__m128i __lsx_vpackev_w (__m128i, __m128i); ++__m128i __lsx_vpackod_b (__m128i, __m128i); ++__m128i __lsx_vpackod_d (__m128i, __m128i); ++__m128i __lsx_vpackod_h (__m128i, __m128i); ++__m128i __lsx_vpackod_w (__m128i, __m128i); ++__m128i __lsx_vpcnt_b (__m128i); ++__m128i __lsx_vpcnt_d (__m128i); ++__m128i __lsx_vpcnt_h (__m128i); ++__m128i __lsx_vpcnt_w (__m128i); ++__m128i __lsx_vpermi_w (__m128i, __m128i, imm0_255); ++__m128i __lsx_vpickev_b (__m128i, __m128i); ++__m128i __lsx_vpickev_d (__m128i, __m128i); ++__m128i __lsx_vpickev_h (__m128i, __m128i); ++__m128i __lsx_vpickev_w (__m128i, __m128i); ++__m128i __lsx_vpickod_b (__m128i, __m128i); ++__m128i __lsx_vpickod_d (__m128i, __m128i); ++__m128i __lsx_vpickod_h (__m128i, __m128i); ++__m128i __lsx_vpickod_w (__m128i, __m128i); ++int __lsx_vpickve2gr_b (__m128i, imm0_15); ++unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15); ++long int __lsx_vpickve2gr_d (__m128i, imm0_1); ++unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1); ++int __lsx_vpickve2gr_h (__m128i, imm0_7); ++unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7); ++int __lsx_vpickve2gr_w (__m128i, imm0_3); ++unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3); ++__m128i __lsx_vreplgr2vr_b (int); ++__m128i __lsx_vreplgr2vr_d (long int); ++__m128i __lsx_vreplgr2vr_h (int); ++__m128i __lsx_vreplgr2vr_w (int); ++__m128i __lsx_vrepli_b (imm_n512_511); ++__m128i __lsx_vrepli_d (imm_n512_511); ++__m128i __lsx_vrepli_h (imm_n512_511); ++__m128i __lsx_vrepli_w (imm_n512_511); ++__m128i __lsx_vreplve_b (__m128i, int); ++__m128i __lsx_vreplve_d (__m128i, int); ++__m128i __lsx_vreplve_h (__m128i, int); ++__m128i __lsx_vreplvei_b (__m128i, imm0_15); ++__m128i __lsx_vreplvei_d (__m128i, imm0_1); ++__m128i __lsx_vreplvei_h (__m128i, imm0_7); ++__m128i __lsx_vreplvei_w (__m128i, imm0_3); ++__m128i __lsx_vreplve_w (__m128i, int); ++__m128i __lsx_vrotr_b (__m128i, __m128i); ++__m128i __lsx_vrotr_d (__m128i, __m128i); ++__m128i __lsx_vrotr_h (__m128i, __m128i); ++__m128i __lsx_vrotri_b (__m128i, imm0_7); ++__m128i __lsx_vrotri_d (__m128i, imm0_63); ++__m128i __lsx_vrotri_h (__m128i, imm0_15); ++__m128i __lsx_vrotri_w (__m128i, imm0_31); ++__m128i __lsx_vrotr_w (__m128i, __m128i); ++__m128i __lsx_vsadd_b (__m128i, __m128i); ++__m128i __lsx_vsadd_bu (__m128i, __m128i); ++__m128i __lsx_vsadd_d (__m128i, __m128i); ++__m128i __lsx_vsadd_du (__m128i, __m128i); ++__m128i __lsx_vsadd_h (__m128i, __m128i); ++__m128i __lsx_vsadd_hu (__m128i, __m128i); ++__m128i __lsx_vsadd_w (__m128i, __m128i); ++__m128i __lsx_vsadd_wu (__m128i, __m128i); ++__m128i __lsx_vsat_b (__m128i, imm0_7); ++__m128i __lsx_vsat_bu (__m128i, imm0_7); ++__m128i __lsx_vsat_d (__m128i, imm0_63); ++__m128i __lsx_vsat_du (__m128i, imm0_63); ++__m128i __lsx_vsat_h (__m128i, imm0_15); ++__m128i __lsx_vsat_hu (__m128i, imm0_15); ++__m128i __lsx_vsat_w (__m128i, imm0_31); ++__m128i __lsx_vsat_wu (__m128i, imm0_31); ++__m128i __lsx_vseq_b (__m128i, __m128i); ++__m128i __lsx_vseq_d (__m128i, __m128i); ++__m128i __lsx_vseq_h (__m128i, __m128i); ++__m128i __lsx_vseqi_b (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_d (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_h (__m128i, imm_n16_15); ++__m128i __lsx_vseqi_w (__m128i, imm_n16_15); ++__m128i __lsx_vseq_w (__m128i, __m128i); ++__m128i __lsx_vshuf4i_b (__m128i, imm0_255); ++__m128i __lsx_vshuf4i_d (__m128i, __m128i, imm0_255); ++__m128i __lsx_vshuf4i_h (__m128i, imm0_255); ++__m128i __lsx_vshuf4i_w (__m128i, imm0_255); ++__m128i __lsx_vshuf_b (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_d (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_h (__m128i, __m128i, __m128i); ++__m128i __lsx_vshuf_w (__m128i, __m128i, __m128i); ++__m128i __lsx_vsigncov_b (__m128i, __m128i); ++__m128i __lsx_vsigncov_d (__m128i, __m128i); ++__m128i __lsx_vsigncov_h (__m128i, __m128i); ++__m128i __lsx_vsigncov_w (__m128i, __m128i); ++__m128i __lsx_vsigncov_b (__m128i, __m128i); ++__m128i __lsx_vsigncov_d (__m128i, __m128i); ++__m128i __lsx_vsigncov_h (__m128i, __m128i); ++__m128i __lsx_vsigncov_w (__m128i, __m128i); ++__m128i __lsx_vsle_b (__m128i, __m128i); ++__m128i __lsx_vsle_bu (__m128i, __m128i); ++__m128i __lsx_vsle_d (__m128i, __m128i); ++__m128i __lsx_vsle_du (__m128i, __m128i); ++__m128i __lsx_vsle_h (__m128i, __m128i); ++__m128i __lsx_vsle_hu (__m128i, __m128i); ++__m128i __lsx_vslei_b (__m128i, imm_n16_15); ++__m128i __lsx_vslei_bu (__m128i, imm0_31); ++__m128i __lsx_vslei_d (__m128i, imm_n16_15); ++__m128i __lsx_vslei_du (__m128i, imm0_31); ++__m128i __lsx_vslei_h (__m128i, imm_n16_15); ++__m128i __lsx_vslei_hu (__m128i, imm0_31); ++__m128i __lsx_vslei_w (__m128i, imm_n16_15); ++__m128i __lsx_vslei_wu (__m128i, imm0_31); ++__m128i __lsx_vsle_w (__m128i, __m128i); ++__m128i __lsx_vsle_wu (__m128i, __m128i); ++__m128i __lsx_vsll_b (__m128i, __m128i); ++__m128i __lsx_vsll_d (__m128i, __m128i); ++__m128i __lsx_vsll_h (__m128i, __m128i); ++__m128i __lsx_vslli_b (__m128i, imm0_7); ++__m128i __lsx_vslli_d (__m128i, imm0_63); ++__m128i __lsx_vslli_h (__m128i, imm0_15); ++__m128i __lsx_vslli_w (__m128i, imm0_31); ++__m128i __lsx_vsll_w (__m128i, __m128i); ++__m128i __lsx_vsllwil_du_wu (__m128i, imm0_31); ++__m128i __lsx_vsllwil_d_w (__m128i, imm0_31); ++__m128i __lsx_vsllwil_h_b (__m128i, imm0_7); ++__m128i __lsx_vsllwil_hu_bu (__m128i, imm0_7); ++__m128i __lsx_vsllwil_w_h (__m128i, imm0_15); ++__m128i __lsx_vsllwil_wu_hu (__m128i, imm0_15); ++__m128i __lsx_vslt_b (__m128i, __m128i); ++__m128i __lsx_vslt_bu (__m128i, __m128i); ++__m128i __lsx_vslt_d (__m128i, __m128i); ++__m128i __lsx_vslt_du (__m128i, __m128i); ++__m128i __lsx_vslt_h (__m128i, __m128i); ++__m128i __lsx_vslt_hu (__m128i, __m128i); ++__m128i __lsx_vslti_b (__m128i, imm_n16_15); ++__m128i __lsx_vslti_bu (__m128i, imm0_31); ++__m128i __lsx_vslti_d (__m128i, imm_n16_15); ++__m128i __lsx_vslti_du (__m128i, imm0_31); ++__m128i __lsx_vslti_h (__m128i, imm_n16_15); ++__m128i __lsx_vslti_hu (__m128i, imm0_31); ++__m128i __lsx_vslti_w (__m128i, imm_n16_15); ++__m128i __lsx_vslti_wu (__m128i, imm0_31); ++__m128i __lsx_vslt_w (__m128i, __m128i); ++__m128i __lsx_vslt_wu (__m128i, __m128i); ++__m128i __lsx_vsra_b (__m128i, __m128i); ++__m128i __lsx_vsra_d (__m128i, __m128i); ++__m128i __lsx_vsra_h (__m128i, __m128i); ++__m128i __lsx_vsrai_b (__m128i, imm0_7); ++__m128i __lsx_vsrai_d (__m128i, imm0_63); ++__m128i __lsx_vsrai_h (__m128i, imm0_15); ++__m128i __lsx_vsrai_w (__m128i, imm0_31); ++__m128i __lsx_vsran_b_h (__m128i, __m128i); ++__m128i __lsx_vsran_h_w (__m128i, __m128i); ++__m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsran_w_d (__m128i, __m128i); ++__m128i __lsx_vsrar_b (__m128i, __m128i); ++__m128i __lsx_vsrar_d (__m128i, __m128i); ++__m128i __lsx_vsrar_h (__m128i, __m128i); ++__m128i __lsx_vsrari_b (__m128i, imm0_7); ++__m128i __lsx_vsrari_d (__m128i, imm0_63); ++__m128i __lsx_vsrari_h (__m128i, imm0_15); ++__m128i __lsx_vsrari_w (__m128i, imm0_31); ++__m128i __lsx_vsrarn_b_h (__m128i, __m128i); ++__m128i __lsx_vsrarn_h_w (__m128i, __m128i); ++__m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrarn_w_d (__m128i, __m128i); ++__m128i __lsx_vsrar_w (__m128i, __m128i); ++__m128i __lsx_vsra_w (__m128i, __m128i); ++__m128i __lsx_vsrl_b (__m128i, __m128i); ++__m128i __lsx_vsrl_d (__m128i, __m128i); ++__m128i __lsx_vsrl_h (__m128i, __m128i); ++__m128i __lsx_vsrli_b (__m128i, imm0_7); ++__m128i __lsx_vsrli_d (__m128i, imm0_63); ++__m128i __lsx_vsrli_h (__m128i, imm0_15); ++__m128i __lsx_vsrli_w (__m128i, imm0_31); ++__m128i __lsx_vsrln_b_h (__m128i, __m128i); ++__m128i __lsx_vsrln_h_w (__m128i, __m128i); ++__m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrln_w_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_b (__m128i, __m128i); ++__m128i __lsx_vsrlr_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_h (__m128i, __m128i); ++__m128i __lsx_vsrlri_b (__m128i, imm0_7); ++__m128i __lsx_vsrlri_d (__m128i, imm0_63); ++__m128i __lsx_vsrlri_h (__m128i, imm0_15); ++__m128i __lsx_vsrlri_w (__m128i, imm0_31); ++__m128i __lsx_vsrlrn_b_h (__m128i, __m128i); ++__m128i __lsx_vsrlrn_h_w (__m128i, __m128i); ++__m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vsrlrn_w_d (__m128i, __m128i); ++__m128i __lsx_vsrlr_w (__m128i, __m128i); ++__m128i __lsx_vsrl_w (__m128i, __m128i); ++__m128i __lsx_vssran_b_h (__m128i, __m128i); ++__m128i __lsx_vssran_bu_h (__m128i, __m128i); ++__m128i __lsx_vssran_hu_w (__m128i, __m128i); ++__m128i __lsx_vssran_h_w (__m128i, __m128i); ++__m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrani_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssran_w_d (__m128i, __m128i); ++__m128i __lsx_vssran_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrarn_b_h (__m128i, __m128i); ++__m128i __lsx_vssrarn_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrarn_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrarn_h_w (__m128i, __m128i); ++__m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrarni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrarn_w_d (__m128i, __m128i); ++__m128i __lsx_vssrarn_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrln_b_h (__m128i, __m128i); ++__m128i __lsx_vssrln_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrln_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrln_h_w (__m128i, __m128i); ++__m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrln_w_d (__m128i, __m128i); ++__m128i __lsx_vssrln_wu_d (__m128i, __m128i); ++__m128i __lsx_vssrlrn_b_h (__m128i, __m128i); ++__m128i __lsx_vssrlrn_bu_h (__m128i, __m128i); ++__m128i __lsx_vssrlrn_hu_w (__m128i, __m128i); ++__m128i __lsx_vssrlrn_h_w (__m128i, __m128i); ++__m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15); ++__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31); ++__m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlrni_wu_d (__m128i, __m128i, imm0_63); ++__m128i __lsx_vssrlrn_w_d (__m128i, __m128i); ++__m128i __lsx_vssrlrn_wu_d (__m128i, __m128i); ++__m128i __lsx_vssub_b (__m128i, __m128i); ++__m128i __lsx_vssub_bu (__m128i, __m128i); ++__m128i __lsx_vssub_d (__m128i, __m128i); ++__m128i __lsx_vssub_du (__m128i, __m128i); ++__m128i __lsx_vssub_h (__m128i, __m128i); ++__m128i __lsx_vssub_hu (__m128i, __m128i); ++__m128i __lsx_vssub_w (__m128i, __m128i); ++__m128i __lsx_vssub_wu (__m128i, __m128i); ++void __lsx_vst (__m128i, void *, imm_n2048_2047) ++void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx); ++void __lsx_vstx (__m128i, void *, long int) ++__m128i __lsx_vsub_b (__m128i, __m128i); ++__m128i __lsx_vsub_d (__m128i, __m128i); ++__m128i __lsx_vsub_h (__m128i, __m128i); ++__m128i __lsx_vsubi_bu (__m128i, imm0_31); ++__m128i __lsx_vsubi_du (__m128i, imm0_31); ++__m128i __lsx_vsubi_hu (__m128i, imm0_31); ++__m128i __lsx_vsubi_wu (__m128i, imm0_31); ++__m128i __lsx_vsub_q (__m128i, __m128i); ++__m128i __lsx_vsub_w (__m128i, __m128i); ++__m128i __lsx_vsubwev_d_w (__m128i, __m128i); ++__m128i __lsx_vsubwev_d_wu (__m128i, __m128i); ++__m128i __lsx_vsubwev_h_b (__m128i, __m128i); ++__m128i __lsx_vsubwev_h_bu (__m128i, __m128i); ++__m128i __lsx_vsubwev_q_d (__m128i, __m128i); ++__m128i __lsx_vsubwev_q_du (__m128i, __m128i); ++__m128i __lsx_vsubwev_w_h (__m128i, __m128i); ++__m128i __lsx_vsubwev_w_hu (__m128i, __m128i); ++__m128i __lsx_vsubwod_d_w (__m128i, __m128i); ++__m128i __lsx_vsubwod_d_wu (__m128i, __m128i); ++__m128i __lsx_vsubwod_h_b (__m128i, __m128i); ++__m128i __lsx_vsubwod_h_bu (__m128i, __m128i); ++__m128i __lsx_vsubwod_q_d (__m128i, __m128i); ++__m128i __lsx_vsubwod_q_du (__m128i, __m128i); ++__m128i __lsx_vsubwod_w_h (__m128i, __m128i); ++__m128i __lsx_vsubwod_w_hu (__m128i, __m128i); ++__m128i __lsx_vxori_b (__m128i, imm0_255); ++__m128i __lsx_vxor_v (__m128i, __m128i); ++@end smallexample ++ ++@node LoongArch ASX Vector Intrinsics ++@subsection LoongArch ASX Vector Intrinsics ++ ++GCC provides intrinsics to access the LASX (Loongson Advanced SIMD Extension) ++instructions. The interface is made available by including @code{} ++and using @option{-mlasx}. ++ ++The following vectors typedefs are included in @code{lasxintrin.h}: ++ ++@itemize ++@item @code{__m256i}, a 256-bit vector of fixed point; ++@item @code{__m256}, a 256-bit vector of single precision floating point; ++@item @code{__m256d}, a 256-bit vector of double precision floating point. ++@end itemize ++ ++Instructions and corresponding built-ins may have additional restrictions and/or ++input/output values manipulated: ++ ++@itemize ++@item @code{imm0_1}, an integer literal in range 0 to 1. ++@item @code{imm0_3}, an integer literal in range 0 to 3. ++@item @code{imm0_7}, an integer literal in range 0 to 7. ++@item @code{imm0_15}, an integer literal in range 0 to 15. ++@item @code{imm0_31}, an integer literal in range 0 to 31. ++@item @code{imm0_63}, an integer literal in range 0 to 63. ++@item @code{imm0_127}, an integer literal in range 0 to 127. ++@item @code{imm0_255}, an integer literal in range 0 to 255. ++@item @code{imm_n16_15}, an integer literal in range -16 to 15. ++@item @code{imm_n128_127}, an integer literal in range -128 to 127. ++@item @code{imm_n256_255}, an integer literal in range -256 to 255. ++@item @code{imm_n512_511}, an integer literal in range -512 to 511. ++@item @code{imm_n1024_1023}, an integer literal in range -1024 to 1023. ++@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047. ++@end itemize ++ ++For convenience, GCC defines functions @code{__lasx_xvrepli_@{b/h/w/d@}} and ++@code{__lasx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows: ++ ++@smallexample ++a. @code{__lasx_xvrepli_@{b/h/w/d@}}: Implemented the case where the highest ++ bit of @code{xvldi} instruction @code{i13} is 1. ++ ++ i13[12] == 1'b0 ++ case i13[11:10] of : ++ 2'b00: __lasx_xvrepli_b (imm_n512_511) ++ 2'b01: __lasx_xvrepli_h (imm_n512_511) ++ 2'b10: __lasx_xvrepli_w (imm_n512_511) ++ 2'b11: __lasx_xvrepli_d (imm_n512_511) ++ ++b. @code{__lasx_b[n]z_@{v/b/h/w/d@}}: Since the @code{xvseteqz} class directive ++ cannot be used on its own, this function is defined. ++ ++ __lasx_xbz_v => xvseteqz.v + bcnez ++ __lasx_xbnz_v => xvsetnez.v + bcnez ++ __lasx_xbz_b => xvsetanyeqz.b + bcnez ++ __lasx_xbz_h => xvsetanyeqz.h + bcnez ++ __lasx_xbz_w => xvsetanyeqz.w + bcnez ++ __lasx_xbz_d => xvsetanyeqz.d + bcnez ++ __lasx_xbnz_b => xvsetallnez.b + bcnez ++ __lasx_xbnz_h => xvsetallnez.h + bcnez ++ __lasx_xbnz_w => xvsetallnez.w + bcnez ++ __lasx_xbnz_d => xvsetallnez.d + bcnez ++@end smallexample ++ ++@smallexample ++eg: ++ #include ++ ++ extern __m256i @var{a}; ++ ++ void ++ test (void) ++ @{ ++ if (__lasx_xbz_v (@var{a})) ++ printf ("1\n"); ++ else ++ printf ("2\n"); ++ @} ++@end smallexample ++ ++@emph{Note:} For directives where the intent operand is also the source operand ++(modifying only part of the bitfield of the intent register), the first parameter ++in the builtin call function is used as the intent operand. ++ ++@smallexample ++eg: ++ #include ++ extern __m256i @var{dst}; ++ int @var{src}; ++ ++ void ++ test (void) ++ @{ ++ @var{dst} = __lasx_xvinsgr2vr_w (@var{dst}, @var{src}, 3); ++ @} ++@end smallexample ++ ++ ++The intrinsics provided are listed below: ++ ++@smallexample ++__m256i __lasx_vext2xv_d_b (__m256i); ++__m256i __lasx_vext2xv_d_h (__m256i); ++__m256i __lasx_vext2xv_du_bu (__m256i); ++__m256i __lasx_vext2xv_du_hu (__m256i); ++__m256i __lasx_vext2xv_du_wu (__m256i); ++__m256i __lasx_vext2xv_d_w (__m256i); ++__m256i __lasx_vext2xv_h_b (__m256i); ++__m256i __lasx_vext2xv_hu_bu (__m256i); ++__m256i __lasx_vext2xv_w_b (__m256i); ++__m256i __lasx_vext2xv_w_h (__m256i); ++__m256i __lasx_vext2xv_wu_bu (__m256i); ++__m256i __lasx_vext2xv_wu_hu (__m256i); ++int __lasx_xbnz_b (__m256i); ++int __lasx_xbnz_d (__m256i); ++int __lasx_xbnz_h (__m256i); ++int __lasx_xbnz_v (__m256i); ++int __lasx_xbnz_w (__m256i); ++int __lasx_xbz_b (__m256i); ++int __lasx_xbz_d (__m256i); ++int __lasx_xbz_h (__m256i); ++int __lasx_xbz_v (__m256i); ++int __lasx_xbz_w (__m256i); ++__m256i __lasx_xvabsd_b (__m256i, __m256i); ++__m256i __lasx_xvabsd_bu (__m256i, __m256i); ++__m256i __lasx_xvabsd_d (__m256i, __m256i); ++__m256i __lasx_xvabsd_du (__m256i, __m256i); ++__m256i __lasx_xvabsd_h (__m256i, __m256i); ++__m256i __lasx_xvabsd_hu (__m256i, __m256i); ++__m256i __lasx_xvabsd_w (__m256i, __m256i); ++__m256i __lasx_xvabsd_wu (__m256i, __m256i); ++__m256i __lasx_xvadda_b (__m256i, __m256i); ++__m256i __lasx_xvadda_d (__m256i, __m256i); ++__m256i __lasx_xvadda_h (__m256i, __m256i); ++__m256i __lasx_xvadda_w (__m256i, __m256i); ++__m256i __lasx_xvadd_b (__m256i, __m256i); ++__m256i __lasx_xvadd_d (__m256i, __m256i); ++__m256i __lasx_xvadd_h (__m256i, __m256i); ++__m256i __lasx_xvaddi_bu (__m256i, imm0_31); ++__m256i __lasx_xvaddi_du (__m256i, imm0_31); ++__m256i __lasx_xvaddi_hu (__m256i, imm0_31); ++__m256i __lasx_xvaddi_wu (__m256i, imm0_31); ++__m256i __lasx_xvadd_q (__m256i, __m256i); ++__m256i __lasx_xvadd_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvaddwev_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvaddwev_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvaddwod_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvaddwod_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvandi_b (__m256i, imm0_255); ++__m256i __lasx_xvandn_v (__m256i, __m256i); ++__m256i __lasx_xvand_v (__m256i, __m256i); ++__m256i __lasx_xvavg_b (__m256i, __m256i); ++__m256i __lasx_xvavg_bu (__m256i, __m256i); ++__m256i __lasx_xvavg_d (__m256i, __m256i); ++__m256i __lasx_xvavg_du (__m256i, __m256i); ++__m256i __lasx_xvavg_h (__m256i, __m256i); ++__m256i __lasx_xvavg_hu (__m256i, __m256i); ++__m256i __lasx_xvavgr_b (__m256i, __m256i); ++__m256i __lasx_xvavgr_bu (__m256i, __m256i); ++__m256i __lasx_xvavgr_d (__m256i, __m256i); ++__m256i __lasx_xvavgr_du (__m256i, __m256i); ++__m256i __lasx_xvavgr_h (__m256i, __m256i); ++__m256i __lasx_xvavgr_hu (__m256i, __m256i); ++__m256i __lasx_xvavgr_w (__m256i, __m256i); ++__m256i __lasx_xvavgr_wu (__m256i, __m256i); ++__m256i __lasx_xvavg_w (__m256i, __m256i); ++__m256i __lasx_xvavg_wu (__m256i, __m256i); ++__m256i __lasx_xvbitclr_b (__m256i, __m256i); ++__m256i __lasx_xvbitclr_d (__m256i, __m256i); ++__m256i __lasx_xvbitclr_h (__m256i, __m256i); ++__m256i __lasx_xvbitclri_b (__m256i, imm0_7); ++__m256i __lasx_xvbitclri_d (__m256i, imm0_63); ++__m256i __lasx_xvbitclri_h (__m256i, imm0_15); ++__m256i __lasx_xvbitclri_w (__m256i, imm0_31); ++__m256i __lasx_xvbitclr_w (__m256i, __m256i); ++__m256i __lasx_xvbitrev_b (__m256i, __m256i); ++__m256i __lasx_xvbitrev_d (__m256i, __m256i); ++__m256i __lasx_xvbitrev_h (__m256i, __m256i); ++__m256i __lasx_xvbitrevi_b (__m256i, imm0_7); ++__m256i __lasx_xvbitrevi_d (__m256i, imm0_63); ++__m256i __lasx_xvbitrevi_h (__m256i, imm0_15); ++__m256i __lasx_xvbitrevi_w (__m256i, imm0_31); ++__m256i __lasx_xvbitrev_w (__m256i, __m256i); ++__m256i __lasx_xvbitseli_b (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvbitsel_v (__m256i, __m256i, __m256i); ++__m256i __lasx_xvbitset_b (__m256i, __m256i); ++__m256i __lasx_xvbitset_d (__m256i, __m256i); ++__m256i __lasx_xvbitset_h (__m256i, __m256i); ++__m256i __lasx_xvbitseti_b (__m256i, imm0_7); ++__m256i __lasx_xvbitseti_d (__m256i, imm0_63); ++__m256i __lasx_xvbitseti_h (__m256i, imm0_15); ++__m256i __lasx_xvbitseti_w (__m256i, imm0_31); ++__m256i __lasx_xvbitset_w (__m256i, __m256i); ++__m256i __lasx_xvbsll_v (__m256i, imm0_31); ++__m256i __lasx_xvbsrl_v (__m256i, imm0_31); ++__m256i __lasx_xvclo_b (__m256i); ++__m256i __lasx_xvclo_d (__m256i); ++__m256i __lasx_xvclo_h (__m256i); ++__m256i __lasx_xvclo_w (__m256i); ++__m256i __lasx_xvclz_b (__m256i); ++__m256i __lasx_xvclz_d (__m256i); ++__m256i __lasx_xvclz_h (__m256i); ++__m256i __lasx_xvclz_w (__m256i); ++__m256i __lasx_xvdiv_b (__m256i, __m256i); ++__m256i __lasx_xvdiv_bu (__m256i, __m256i); ++__m256i __lasx_xvdiv_d (__m256i, __m256i); ++__m256i __lasx_xvdiv_du (__m256i, __m256i); ++__m256i __lasx_xvdiv_h (__m256i, __m256i); ++__m256i __lasx_xvdiv_hu (__m256i, __m256i); ++__m256i __lasx_xvdiv_w (__m256i, __m256i); ++__m256i __lasx_xvdiv_wu (__m256i, __m256i); ++__m256i __lasx_xvexth_du_wu (__m256i); ++__m256i __lasx_xvexth_d_w (__m256i); ++__m256i __lasx_xvexth_h_b (__m256i); ++__m256i __lasx_xvexth_hu_bu (__m256i); ++__m256i __lasx_xvexth_q_d (__m256i); ++__m256i __lasx_xvexth_qu_du (__m256i); ++__m256i __lasx_xvexth_w_h (__m256i); ++__m256i __lasx_xvexth_wu_hu (__m256i); ++__m256i __lasx_xvextl_q_d (__m256i); ++__m256i __lasx_xvextl_qu_du (__m256i); ++__m256i __lasx_xvextrins_b (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_d (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_h (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvextrins_w (__m256i, __m256i, imm0_255); ++__m256d __lasx_xvfadd_d (__m256d, __m256d); ++__m256 __lasx_xvfadd_s (__m256, __m256); ++__m256i __lasx_xvfclass_d (__m256d); ++__m256i __lasx_xvfclass_s (__m256); ++__m256i __lasx_xvfcmp_caf_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_caf_s (__m256, __m256); ++__m256i __lasx_xvfcmp_ceq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_ceq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cle_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cle_s (__m256, __m256); ++__m256i __lasx_xvfcmp_clt_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_clt_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cne_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cne_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cor_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cor_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cueq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cueq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cule_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cule_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cult_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cult_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cun_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cune_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_cune_s (__m256, __m256); ++__m256i __lasx_xvfcmp_cun_s (__m256, __m256); ++__m256i __lasx_xvfcmp_saf_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_saf_s (__m256, __m256); ++__m256i __lasx_xvfcmp_seq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_seq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sle_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sle_s (__m256, __m256); ++__m256i __lasx_xvfcmp_slt_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_slt_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sne_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sne_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sor_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sor_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sueq_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sueq_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sule_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sule_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sult_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sult_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sun_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sune_d (__m256d, __m256d); ++__m256i __lasx_xvfcmp_sune_s (__m256, __m256); ++__m256i __lasx_xvfcmp_sun_s (__m256, __m256); ++__m256d __lasx_xvfcvth_d_s (__m256); ++__m256i __lasx_xvfcvt_h_s (__m256, __m256); ++__m256 __lasx_xvfcvth_s_h (__m256i); ++__m256d __lasx_xvfcvtl_d_s (__m256); ++__m256 __lasx_xvfcvtl_s_h (__m256i); ++__m256 __lasx_xvfcvt_s_d (__m256d, __m256d); ++__m256d __lasx_xvfdiv_d (__m256d, __m256d); ++__m256 __lasx_xvfdiv_s (__m256, __m256); ++__m256d __lasx_xvffint_d_l (__m256i); ++__m256d __lasx_xvffint_d_lu (__m256i); ++__m256d __lasx_xvffinth_d_w (__m256i); ++__m256d __lasx_xvffintl_d_w (__m256i); ++__m256 __lasx_xvffint_s_l (__m256i, __m256i); ++__m256 __lasx_xvffint_s_w (__m256i); ++__m256 __lasx_xvffint_s_wu (__m256i); ++__m256d __lasx_xvflogb_d (__m256d); ++__m256 __lasx_xvflogb_s (__m256); ++__m256d __lasx_xvfmadd_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfmadd_s (__m256, __m256, __m256); ++__m256d __lasx_xvfmaxa_d (__m256d, __m256d); ++__m256 __lasx_xvfmaxa_s (__m256, __m256); ++__m256d __lasx_xvfmax_d (__m256d, __m256d); ++__m256 __lasx_xvfmax_s (__m256, __m256); ++__m256d __lasx_xvfmina_d (__m256d, __m256d); ++__m256 __lasx_xvfmina_s (__m256, __m256); ++__m256d __lasx_xvfmin_d (__m256d, __m256d); ++__m256 __lasx_xvfmin_s (__m256, __m256); ++__m256d __lasx_xvfmsub_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfmsub_s (__m256, __m256, __m256); ++__m256d __lasx_xvfmul_d (__m256d, __m256d); ++__m256 __lasx_xvfmul_s (__m256, __m256); ++__m256d __lasx_xvfnmadd_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfnmadd_s (__m256, __m256, __m256); ++__m256d __lasx_xvfnmsub_d (__m256d, __m256d, __m256d); ++__m256 __lasx_xvfnmsub_s (__m256, __m256, __m256); ++__m256d __lasx_xvfrecip_d (__m256d); ++__m256 __lasx_xvfrecip_s (__m256); ++__m256d __lasx_xvfrint_d (__m256d); ++__m256i __lasx_xvfrintrm_d (__m256d); ++__m256i __lasx_xvfrintrm_s (__m256); ++__m256i __lasx_xvfrintrne_d (__m256d); ++__m256i __lasx_xvfrintrne_s (__m256); ++__m256i __lasx_xvfrintrp_d (__m256d); ++__m256i __lasx_xvfrintrp_s (__m256); ++__m256i __lasx_xvfrintrz_d (__m256d); ++__m256i __lasx_xvfrintrz_s (__m256); ++__m256 __lasx_xvfrint_s (__m256); ++__m256d __lasx_xvfrsqrt_d (__m256d); ++__m256 __lasx_xvfrsqrt_s (__m256); ++__m256i __lasx_xvfrstp_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvfrstp_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvfrstpi_b (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvfrstpi_h (__m256i, __m256i, imm0_31); ++__m256d __lasx_xvfsqrt_d (__m256d); ++__m256 __lasx_xvfsqrt_s (__m256); ++__m256d __lasx_xvfsub_d (__m256d, __m256d); ++__m256 __lasx_xvfsub_s (__m256, __m256); ++__m256i __lasx_xvftinth_l_s (__m256); ++__m256i __lasx_xvftint_l_d (__m256d); ++__m256i __lasx_xvftintl_l_s (__m256); ++__m256i __lasx_xvftint_lu_d (__m256d); ++__m256i __lasx_xvftintrmh_l_s (__m256); ++__m256i __lasx_xvftintrm_l_d (__m256d); ++__m256i __lasx_xvftintrml_l_s (__m256); ++__m256i __lasx_xvftintrm_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrm_w_s (__m256); ++__m256i __lasx_xvftintrneh_l_s (__m256); ++__m256i __lasx_xvftintrne_l_d (__m256d); ++__m256i __lasx_xvftintrnel_l_s (__m256); ++__m256i __lasx_xvftintrne_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrne_w_s (__m256); ++__m256i __lasx_xvftintrph_l_s (__m256); ++__m256i __lasx_xvftintrp_l_d (__m256d); ++__m256i __lasx_xvftintrpl_l_s (__m256); ++__m256i __lasx_xvftintrp_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrp_w_s (__m256); ++__m256i __lasx_xvftintrzh_l_s (__m256); ++__m256i __lasx_xvftintrz_l_d (__m256d); ++__m256i __lasx_xvftintrzl_l_s (__m256); ++__m256i __lasx_xvftintrz_lu_d (__m256d); ++__m256i __lasx_xvftintrz_w_d (__m256d, __m256d); ++__m256i __lasx_xvftintrz_w_s (__m256); ++__m256i __lasx_xvftintrz_wu_s (__m256); ++__m256i __lasx_xvftint_w_d (__m256d, __m256d); ++__m256i __lasx_xvftint_w_s (__m256); ++__m256i __lasx_xvftint_wu_s (__m256); ++__m256i __lasx_xvhaddw_du_wu (__m256i, __m256i); ++__m256i __lasx_xvhaddw_d_w (__m256i, __m256i); ++__m256i __lasx_xvhaddw_h_b (__m256i, __m256i); ++__m256i __lasx_xvhaddw_hu_bu (__m256i, __m256i); ++__m256i __lasx_xvhaddw_q_d (__m256i, __m256i); ++__m256i __lasx_xvhaddw_qu_du (__m256i, __m256i); ++__m256i __lasx_xvhaddw_w_h (__m256i, __m256i); ++__m256i __lasx_xvhaddw_wu_hu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_du_wu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_d_w (__m256i, __m256i); ++__m256i __lasx_xvhsubw_h_b (__m256i, __m256i); ++__m256i __lasx_xvhsubw_hu_bu (__m256i, __m256i); ++__m256i __lasx_xvhsubw_q_d (__m256i, __m256i); ++__m256i __lasx_xvhsubw_qu_du (__m256i, __m256i); ++__m256i __lasx_xvhsubw_w_h (__m256i, __m256i); ++__m256i __lasx_xvhsubw_wu_hu (__m256i, __m256i); ++__m256i __lasx_xvilvh_b (__m256i, __m256i); ++__m256i __lasx_xvilvh_d (__m256i, __m256i); ++__m256i __lasx_xvilvh_h (__m256i, __m256i); ++__m256i __lasx_xvilvh_w (__m256i, __m256i); ++__m256i __lasx_xvilvl_b (__m256i, __m256i); ++__m256i __lasx_xvilvl_d (__m256i, __m256i); ++__m256i __lasx_xvilvl_h (__m256i, __m256i); ++__m256i __lasx_xvilvl_w (__m256i, __m256i); ++__m256i __lasx_xvinsgr2vr_d (__m256i, long int, imm0_3); ++__m256i __lasx_xvinsgr2vr_w (__m256i, int, imm0_7); ++__m256i __lasx_xvinsve0_d (__m256i, __m256i, imm0_3); ++__m256i __lasx_xvinsve0_w (__m256i, __m256i, imm0_7); ++__m256i __lasx_xvld (void *, imm_n2048_2047); ++__m256i __lasx_xvldi (imm_n1024_1023); ++__m256i __lasx_xvldrepl_b (void *, imm_n2048_2047); ++__m256i __lasx_xvldrepl_d (void *, imm_n256_255); ++__m256i __lasx_xvldrepl_h (void *, imm_n1024_1023); ++__m256i __lasx_xvldrepl_w (void *, imm_n512_511); ++__m256i __lasx_xvldx (void *, long int); ++__m256i __lasx_xvmadd_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmadd_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_wu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_d_wu_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_bu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_h_bu_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_du (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_q_du_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_hu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwev_w_hu_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_wu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_d_wu_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_bu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_h_bu_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_du (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_q_du_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_hu (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmaddwod_w_hu_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmax_b (__m256i, __m256i); ++__m256i __lasx_xvmax_bu (__m256i, __m256i); ++__m256i __lasx_xvmax_d (__m256i, __m256i); ++__m256i __lasx_xvmax_du (__m256i, __m256i); ++__m256i __lasx_xvmax_h (__m256i, __m256i); ++__m256i __lasx_xvmax_hu (__m256i, __m256i); ++__m256i __lasx_xvmaxi_b (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_bu (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_d (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_du (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_h (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_hu (__m256i, imm0_31); ++__m256i __lasx_xvmaxi_w (__m256i, imm_n16_15); ++__m256i __lasx_xvmaxi_wu (__m256i, imm0_31); ++__m256i __lasx_xvmax_w (__m256i, __m256i); ++__m256i __lasx_xvmax_wu (__m256i, __m256i); ++__m256i __lasx_xvmin_b (__m256i, __m256i); ++__m256i __lasx_xvmin_bu (__m256i, __m256i); ++__m256i __lasx_xvmin_d (__m256i, __m256i); ++__m256i __lasx_xvmin_du (__m256i, __m256i); ++__m256i __lasx_xvmin_h (__m256i, __m256i); ++__m256i __lasx_xvmin_hu (__m256i, __m256i); ++__m256i __lasx_xvmini_b (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_bu (__m256i, imm0_31); ++__m256i __lasx_xvmini_d (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_du (__m256i, imm0_31); ++__m256i __lasx_xvmini_h (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_hu (__m256i, imm0_31); ++__m256i __lasx_xvmini_w (__m256i, imm_n16_15); ++__m256i __lasx_xvmini_wu (__m256i, imm0_31); ++__m256i __lasx_xvmin_w (__m256i, __m256i); ++__m256i __lasx_xvmin_wu (__m256i, __m256i); ++__m256i __lasx_xvmod_b (__m256i, __m256i); ++__m256i __lasx_xvmod_bu (__m256i, __m256i); ++__m256i __lasx_xvmod_d (__m256i, __m256i); ++__m256i __lasx_xvmod_du (__m256i, __m256i); ++__m256i __lasx_xvmod_h (__m256i, __m256i); ++__m256i __lasx_xvmod_hu (__m256i, __m256i); ++__m256i __lasx_xvmod_w (__m256i, __m256i); ++__m256i __lasx_xvmod_wu (__m256i, __m256i); ++__m256i __lasx_xvmskgez_b (__m256i); ++__m256i __lasx_xvmskltz_b (__m256i); ++__m256i __lasx_xvmskltz_d (__m256i); ++__m256i __lasx_xvmskltz_h (__m256i); ++__m256i __lasx_xvmskltz_w (__m256i); ++__m256i __lasx_xvmsknz_b (__m256i); ++__m256i __lasx_xvmsub_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmsub_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvmuh_b (__m256i, __m256i); ++__m256i __lasx_xvmuh_bu (__m256i, __m256i); ++__m256i __lasx_xvmuh_d (__m256i, __m256i); ++__m256i __lasx_xvmuh_du (__m256i, __m256i); ++__m256i __lasx_xvmuh_h (__m256i, __m256i); ++__m256i __lasx_xvmuh_hu (__m256i, __m256i); ++__m256i __lasx_xvmuh_w (__m256i, __m256i); ++__m256i __lasx_xvmuh_wu (__m256i, __m256i); ++__m256i __lasx_xvmul_b (__m256i, __m256i); ++__m256i __lasx_xvmul_d (__m256i, __m256i); ++__m256i __lasx_xvmul_h (__m256i, __m256i); ++__m256i __lasx_xvmul_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvmulwev_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvmulwev_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_d_wu_w (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_h_bu_b (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvmulwod_q_du_d (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvmulwod_w_hu_h (__m256i, __m256i); ++__m256i __lasx_xvneg_b (__m256i); ++__m256i __lasx_xvneg_d (__m256i); ++__m256i __lasx_xvneg_h (__m256i); ++__m256i __lasx_xvneg_w (__m256i); ++__m256i __lasx_xvnori_b (__m256i, imm0_255); ++__m256i __lasx_xvnor_v (__m256i, __m256i); ++__m256i __lasx_xvori_b (__m256i, imm0_255); ++__m256i __lasx_xvorn_v (__m256i, __m256i); ++__m256i __lasx_xvor_v (__m256i, __m256i); ++__m256i __lasx_xvpackev_b (__m256i, __m256i); ++__m256i __lasx_xvpackev_d (__m256i, __m256i); ++__m256i __lasx_xvpackev_h (__m256i, __m256i); ++__m256i __lasx_xvpackev_w (__m256i, __m256i); ++__m256i __lasx_xvpackod_b (__m256i, __m256i); ++__m256i __lasx_xvpackod_d (__m256i, __m256i); ++__m256i __lasx_xvpackod_h (__m256i, __m256i); ++__m256i __lasx_xvpackod_w (__m256i, __m256i); ++__m256i __lasx_xvpcnt_b (__m256i); ++__m256i __lasx_xvpcnt_d (__m256i); ++__m256i __lasx_xvpcnt_h (__m256i); ++__m256i __lasx_xvpcnt_w (__m256i); ++__m256i __lasx_xvpermi_d (__m256i, imm0_255); ++__m256i __lasx_xvpermi_q (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvpermi_w (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvperm_w (__m256i, __m256i); ++__m256i __lasx_xvpickev_b (__m256i, __m256i); ++__m256i __lasx_xvpickev_d (__m256i, __m256i); ++__m256i __lasx_xvpickev_h (__m256i, __m256i); ++__m256i __lasx_xvpickev_w (__m256i, __m256i); ++__m256i __lasx_xvpickod_b (__m256i, __m256i); ++__m256i __lasx_xvpickod_d (__m256i, __m256i); ++__m256i __lasx_xvpickod_h (__m256i, __m256i); ++__m256i __lasx_xvpickod_w (__m256i, __m256i); ++long int __lasx_xvpickve2gr_d (__m256i, imm0_3); ++unsigned long int __lasx_xvpickve2gr_du (__m256i, imm0_3); ++int __lasx_xvpickve2gr_w (__m256i, imm0_7); ++unsigned int __lasx_xvpickve2gr_wu (__m256i, imm0_7); ++__m256i __lasx_xvpickve_d (__m256i, imm0_3); ++__m256d __lasx_xvpickve_d_f (__m256d, imm0_3); ++__m256i __lasx_xvpickve_w (__m256i, imm0_7); ++__m256 __lasx_xvpickve_w_f (__m256, imm0_7); ++__m256i __lasx_xvrepl128vei_b (__m256i, imm0_15); ++__m256i __lasx_xvrepl128vei_d (__m256i, imm0_1); ++__m256i __lasx_xvrepl128vei_h (__m256i, imm0_7); ++__m256i __lasx_xvrepl128vei_w (__m256i, imm0_3); ++__m256i __lasx_xvreplgr2vr_b (int); ++__m256i __lasx_xvreplgr2vr_d (long int); ++__m256i __lasx_xvreplgr2vr_h (int); ++__m256i __lasx_xvreplgr2vr_w (int); ++__m256i __lasx_xvrepli_b (imm_n512_511); ++__m256i __lasx_xvrepli_d (imm_n512_511); ++__m256i __lasx_xvrepli_h (imm_n512_511); ++__m256i __lasx_xvrepli_w (imm_n512_511); ++__m256i __lasx_xvreplve0_b (__m256i); ++__m256i __lasx_xvreplve0_d (__m256i); ++__m256i __lasx_xvreplve0_h (__m256i); ++__m256i __lasx_xvreplve0_q (__m256i); ++__m256i __lasx_xvreplve0_w (__m256i); ++__m256i __lasx_xvreplve_b (__m256i, int); ++__m256i __lasx_xvreplve_d (__m256i, int); ++__m256i __lasx_xvreplve_h (__m256i, int); ++__m256i __lasx_xvreplve_w (__m256i, int); ++__m256i __lasx_xvrotr_b (__m256i, __m256i); ++__m256i __lasx_xvrotr_d (__m256i, __m256i); ++__m256i __lasx_xvrotr_h (__m256i, __m256i); ++__m256i __lasx_xvrotri_b (__m256i, imm0_7); ++__m256i __lasx_xvrotri_d (__m256i, imm0_63); ++__m256i __lasx_xvrotri_h (__m256i, imm0_15); ++__m256i __lasx_xvrotri_w (__m256i, imm0_31); ++__m256i __lasx_xvrotr_w (__m256i, __m256i); ++__m256i __lasx_xvsadd_b (__m256i, __m256i); ++__m256i __lasx_xvsadd_bu (__m256i, __m256i); ++__m256i __lasx_xvsadd_d (__m256i, __m256i); ++__m256i __lasx_xvsadd_du (__m256i, __m256i); ++__m256i __lasx_xvsadd_h (__m256i, __m256i); ++__m256i __lasx_xvsadd_hu (__m256i, __m256i); ++__m256i __lasx_xvsadd_w (__m256i, __m256i); ++__m256i __lasx_xvsadd_wu (__m256i, __m256i); ++__m256i __lasx_xvsat_b (__m256i, imm0_7); ++__m256i __lasx_xvsat_bu (__m256i, imm0_7); ++__m256i __lasx_xvsat_d (__m256i, imm0_63); ++__m256i __lasx_xvsat_du (__m256i, imm0_63); ++__m256i __lasx_xvsat_h (__m256i, imm0_15); ++__m256i __lasx_xvsat_hu (__m256i, imm0_15); ++__m256i __lasx_xvsat_w (__m256i, imm0_31); ++__m256i __lasx_xvsat_wu (__m256i, imm0_31); ++__m256i __lasx_xvseq_b (__m256i, __m256i); ++__m256i __lasx_xvseq_d (__m256i, __m256i); ++__m256i __lasx_xvseq_h (__m256i, __m256i); ++__m256i __lasx_xvseqi_b (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_d (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_h (__m256i, imm_n16_15); ++__m256i __lasx_xvseqi_w (__m256i, imm_n16_15); ++__m256i __lasx_xvseq_w (__m256i, __m256i); ++__m256i __lasx_xvshuf4i_b (__m256i, imm0_255); ++__m256i __lasx_xvshuf4i_d (__m256i, __m256i, imm0_255); ++__m256i __lasx_xvshuf4i_h (__m256i, imm0_255); ++__m256i __lasx_xvshuf4i_w (__m256i, imm0_255); ++__m256i __lasx_xvshuf_b (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_d (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_h (__m256i, __m256i, __m256i); ++__m256i __lasx_xvshuf_w (__m256i, __m256i, __m256i); ++__m256i __lasx_xvsigncov_b (__m256i, __m256i); ++__m256i __lasx_xvsigncov_d (__m256i, __m256i); ++__m256i __lasx_xvsigncov_h (__m256i, __m256i); ++__m256i __lasx_xvsigncov_w (__m256i, __m256i); ++__m256i __lasx_xvsle_b (__m256i, __m256i); ++__m256i __lasx_xvsle_bu (__m256i, __m256i); ++__m256i __lasx_xvsle_d (__m256i, __m256i); ++__m256i __lasx_xvsle_du (__m256i, __m256i); ++__m256i __lasx_xvsle_h (__m256i, __m256i); ++__m256i __lasx_xvsle_hu (__m256i, __m256i); ++__m256i __lasx_xvslei_b (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_bu (__m256i, imm0_31); ++__m256i __lasx_xvslei_d (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_du (__m256i, imm0_31); ++__m256i __lasx_xvslei_h (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_hu (__m256i, imm0_31); ++__m256i __lasx_xvslei_w (__m256i, imm_n16_15); ++__m256i __lasx_xvslei_wu (__m256i, imm0_31); ++__m256i __lasx_xvsle_w (__m256i, __m256i); ++__m256i __lasx_xvsle_wu (__m256i, __m256i); ++__m256i __lasx_xvsll_b (__m256i, __m256i); ++__m256i __lasx_xvsll_d (__m256i, __m256i); ++__m256i __lasx_xvsll_h (__m256i, __m256i); ++__m256i __lasx_xvslli_b (__m256i, imm0_7); ++__m256i __lasx_xvslli_d (__m256i, imm0_63); ++__m256i __lasx_xvslli_h (__m256i, imm0_15); ++__m256i __lasx_xvslli_w (__m256i, imm0_31); ++__m256i __lasx_xvsll_w (__m256i, __m256i); ++__m256i __lasx_xvsllwil_du_wu (__m256i, imm0_31); ++__m256i __lasx_xvsllwil_d_w (__m256i, imm0_31); ++__m256i __lasx_xvsllwil_h_b (__m256i, imm0_7); ++__m256i __lasx_xvsllwil_hu_bu (__m256i, imm0_7); ++__m256i __lasx_xvsllwil_w_h (__m256i, imm0_15); ++__m256i __lasx_xvsllwil_wu_hu (__m256i, imm0_15); ++__m256i __lasx_xvslt_b (__m256i, __m256i); ++__m256i __lasx_xvslt_bu (__m256i, __m256i); ++__m256i __lasx_xvslt_d (__m256i, __m256i); ++__m256i __lasx_xvslt_du (__m256i, __m256i); ++__m256i __lasx_xvslt_h (__m256i, __m256i); ++__m256i __lasx_xvslt_hu (__m256i, __m256i); ++__m256i __lasx_xvslti_b (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_bu (__m256i, imm0_31); ++__m256i __lasx_xvslti_d (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_du (__m256i, imm0_31); ++__m256i __lasx_xvslti_h (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_hu (__m256i, imm0_31); ++__m256i __lasx_xvslti_w (__m256i, imm_n16_15); ++__m256i __lasx_xvslti_wu (__m256i, imm0_31); ++__m256i __lasx_xvslt_w (__m256i, __m256i); ++__m256i __lasx_xvslt_wu (__m256i, __m256i); ++__m256i __lasx_xvsra_b (__m256i, __m256i); ++__m256i __lasx_xvsra_d (__m256i, __m256i); ++__m256i __lasx_xvsra_h (__m256i, __m256i); ++__m256i __lasx_xvsrai_b (__m256i, imm0_7); ++__m256i __lasx_xvsrai_d (__m256i, imm0_63); ++__m256i __lasx_xvsrai_h (__m256i, imm0_15); ++__m256i __lasx_xvsrai_w (__m256i, imm0_31); ++__m256i __lasx_xvsran_b_h (__m256i, __m256i); ++__m256i __lasx_xvsran_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrani_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrani_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrani_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrani_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsran_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_b (__m256i, __m256i); ++__m256i __lasx_xvsrar_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_h (__m256i, __m256i); ++__m256i __lasx_xvsrari_b (__m256i, imm0_7); ++__m256i __lasx_xvsrari_d (__m256i, imm0_63); ++__m256i __lasx_xvsrari_h (__m256i, imm0_15); ++__m256i __lasx_xvsrari_w (__m256i, imm0_31); ++__m256i __lasx_xvsrarn_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrarn_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrarni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrarni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrarni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrarni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrarn_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrar_w (__m256i, __m256i); ++__m256i __lasx_xvsra_w (__m256i, __m256i); ++__m256i __lasx_xvsrl_b (__m256i, __m256i); ++__m256i __lasx_xvsrl_d (__m256i, __m256i); ++__m256i __lasx_xvsrl_h (__m256i, __m256i); ++__m256i __lasx_xvsrli_b (__m256i, imm0_7); ++__m256i __lasx_xvsrli_d (__m256i, imm0_63); ++__m256i __lasx_xvsrli_h (__m256i, imm0_15); ++__m256i __lasx_xvsrli_w (__m256i, imm0_31); ++__m256i __lasx_xvsrln_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrln_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrlni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrlni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrlni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrlni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrln_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_b (__m256i, __m256i); ++__m256i __lasx_xvsrlr_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_h (__m256i, __m256i); ++__m256i __lasx_xvsrlri_b (__m256i, imm0_7); ++__m256i __lasx_xvsrlri_d (__m256i, imm0_63); ++__m256i __lasx_xvsrlri_h (__m256i, imm0_15); ++__m256i __lasx_xvsrlri_w (__m256i, imm0_31); ++__m256i __lasx_xvsrlrn_b_h (__m256i, __m256i); ++__m256i __lasx_xvsrlrn_h_w (__m256i, __m256i); ++__m256i __lasx_xvsrlrni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvsrlrni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvsrlrni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvsrlrni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvsrlrn_w_d (__m256i, __m256i); ++__m256i __lasx_xvsrlr_w (__m256i, __m256i); ++__m256i __lasx_xvsrl_w (__m256i, __m256i); ++__m256i __lasx_xvssran_b_h (__m256i, __m256i); ++__m256i __lasx_xvssran_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssran_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssran_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrani_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrani_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrani_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrani_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrani_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrani_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrani_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrani_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssran_w_d (__m256i, __m256i); ++__m256i __lasx_xvssran_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrarn_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrarn_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrarn_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrarn_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrarni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrarni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrarni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrarni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrarni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrarni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrarni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrarni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrarn_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrarn_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrln_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrln_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrln_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrln_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrlni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrln_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrln_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_b_h (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_bu_h (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_hu_w (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_h_w (__m256i, __m256i); ++__m256i __lasx_xvssrlrni_b_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlrni_bu_h (__m256i, __m256i, imm0_15); ++__m256i __lasx_xvssrlrni_d_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlrni_du_q (__m256i, __m256i, imm0_127); ++__m256i __lasx_xvssrlrni_hu_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlrni_h_w (__m256i, __m256i, imm0_31); ++__m256i __lasx_xvssrlrni_w_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlrni_wu_d (__m256i, __m256i, imm0_63); ++__m256i __lasx_xvssrlrn_w_d (__m256i, __m256i); ++__m256i __lasx_xvssrlrn_wu_d (__m256i, __m256i); ++__m256i __lasx_xvssub_b (__m256i, __m256i); ++__m256i __lasx_xvssub_bu (__m256i, __m256i); ++__m256i __lasx_xvssub_d (__m256i, __m256i); ++__m256i __lasx_xvssub_du (__m256i, __m256i); ++__m256i __lasx_xvssub_h (__m256i, __m256i); ++__m256i __lasx_xvssub_hu (__m256i, __m256i); ++__m256i __lasx_xvssub_w (__m256i, __m256i); ++__m256i __lasx_xvssub_wu (__m256i, __m256i); ++void __lasx_xvst (__m256i, void *, imm_n2048_2047); ++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstx (__m256i, void *, long int); ++__m256i __lasx_xvsub_b (__m256i, __m256i); ++__m256i __lasx_xvsub_d (__m256i, __m256i); ++__m256i __lasx_xvsub_h (__m256i, __m256i); ++__m256i __lasx_xvsubi_bu (__m256i, imm0_31); ++__m256i __lasx_xvsubi_du (__m256i, imm0_31); ++__m256i __lasx_xvsubi_hu (__m256i, imm0_31); ++__m256i __lasx_xvsubi_wu (__m256i, imm0_31); ++__m256i __lasx_xvsub_q (__m256i, __m256i); ++__m256i __lasx_xvsub_w (__m256i, __m256i); ++__m256i __lasx_xvsubwev_d_w (__m256i, __m256i); ++__m256i __lasx_xvsubwev_d_wu (__m256i, __m256i); ++__m256i __lasx_xvsubwev_h_b (__m256i, __m256i); ++__m256i __lasx_xvsubwev_h_bu (__m256i, __m256i); ++__m256i __lasx_xvsubwev_q_d (__m256i, __m256i); ++__m256i __lasx_xvsubwev_q_du (__m256i, __m256i); ++__m256i __lasx_xvsubwev_w_h (__m256i, __m256i); ++__m256i __lasx_xvsubwev_w_hu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_d_w (__m256i, __m256i); ++__m256i __lasx_xvsubwod_d_wu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_h_b (__m256i, __m256i); ++__m256i __lasx_xvsubwod_h_bu (__m256i, __m256i); ++__m256i __lasx_xvsubwod_q_d (__m256i, __m256i); ++__m256i __lasx_xvsubwod_q_du (__m256i, __m256i); ++__m256i __lasx_xvsubwod_w_h (__m256i, __m256i); ++__m256i __lasx_xvsubwod_w_hu (__m256i, __m256i); ++__m256i __lasx_xvxori_b (__m256i, imm0_255); ++__m256i __lasx_xvxor_v (__m256i, __m256i); ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +-- +2.43.0 + diff --git a/0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch b/0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch new file mode 100644 index 0000000..59bfd8e --- /dev/null +++ b/0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch @@ -0,0 +1,925 @@ +From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 1 Dec 2023 10:09:33 +0800 +Subject: [PATCH 056/188] LoongArch: Switch loongarch-def from C to C++ to make + it possible. + +We'll use HOST_WIDE_INT in LoongArch static properties in following patches. + +To keep the same readability as C99 designated initializers, create a +std::array like data structure with position setter function, and add +field setter functions for structs used in loongarch-def.cc. + +Remove unneeded guards #if +!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +in loongarch-def.h and loongarch-opts.h. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Remove extern "C". + (loongarch_isa_base_strings): Declare as loongarch_def_array + instead of plain array. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (loongarch_cpu_strings): Likewise. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_isa): Add a constructor and field setter functions. + * config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not + include for target libraries. + * config/loongarch/loongarch-opts.cc: Comment code that doesn't + run and causes compilation errors. + * config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise. + (struct loongarch_rtx_cost_data): Likewise. + (struct loongarch_cache): Likewise. + (struct loongarch_align): Likewise. + * config/loongarch/t-loongarch: Compile loongarch-def.cc with the + C++ compiler. + * config/loongarch/loongarch-def-array.h: New file for a + std:array like data structure with position setter function. + * config/loongarch/loongarch-def.c: Rename to ... + * config/loongarch/loongarch-def.cc: ... here. + (loongarch_cpu_strings): Define as loongarch_def_array instead + of plain array. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_isa_base_strings): Likewise. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (abi_minimal_isa): Likewise. + (loongarch_rtx_cost_optimize_size): Use field setter functions + instead of designated initializers. + (loongarch_rtx_cost_data): Implement default constructor. +--- + gcc/config/loongarch/loongarch-def-array.h | 40 ++++ + gcc/config/loongarch/loongarch-def.c | 227 --------------------- + gcc/config/loongarch/loongarch-def.cc | 187 +++++++++++++++++ + gcc/config/loongarch/loongarch-def.h | 55 ++--- + gcc/config/loongarch/loongarch-opts.cc | 7 + + gcc/config/loongarch/loongarch-opts.h | 5 +- + gcc/config/loongarch/loongarch-tune.h | 123 ++++++++++- + gcc/config/loongarch/t-loongarch | 4 +- + 8 files changed, 390 insertions(+), 258 deletions(-) + create mode 100644 gcc/config/loongarch/loongarch-def-array.h + delete mode 100644 gcc/config/loongarch/loongarch-def.c + create mode 100644 gcc/config/loongarch/loongarch-def.cc + +diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h +new file mode 100644 +index 000000000..bdb3e9c6a +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def-array.h +@@ -0,0 +1,40 @@ ++/* A std::array like data structure for LoongArch static properties. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef _LOONGARCH_DEF_ARRAY_H ++#define _LOONGARCH_DEF_ARRAY_H 1 ++ ++template ++class loongarch_def_array { ++private: ++ T arr[N]; ++public: ++ loongarch_def_array () : arr{} {} ++ ++ T &operator[] (int n) { return arr[n]; } ++ const T &operator[] (int n) const { return arr[n]; } ++ ++ loongarch_def_array set (int idx, T &&value) ++ { ++ (*this)[idx] = value; ++ return *this; ++ } ++}; ++ ++#endif +diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c +deleted file mode 100644 +index fe4474e77..000000000 +--- a/gcc/config/loongarch/loongarch-def.c ++++ /dev/null +@@ -1,227 +0,0 @@ +-/* LoongArch static properties. +- Copyright (C) 2021-2022 Free Software Foundation, Inc. +- Contributed by Loongson Ltd. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify +-it under the terms of the GNU General Public License as published by +-the Free Software Foundation; either version 3, or (at your option) +-any later version. +- +-GCC is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-GNU General Public License for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +-#include "loongarch-def.h" +-#include "loongarch-str.h" +- +-/* CPU property tables. */ +-const char* +-loongarch_cpu_strings[N_TUNE_TYPES] = { +- [CPU_NATIVE] = STR_CPU_NATIVE, +- [CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT, +- [CPU_LOONGARCH64] = STR_CPU_LOONGARCH64, +- [CPU_LA464] = STR_CPU_LA464, +- [CPU_LA664] = STR_CPU_LA664, +-}; +- +-struct loongarch_isa +-loongarch_cpu_default_isa[N_ARCH_TYPES] = { +- [CPU_LOONGARCH64] = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = 0, +- }, +- [CPU_LA464] = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +- [CPU_LA664] = { +- .base = ISA_BASE_LA64V110, +- .fpu = ISA_EXT_FPU64, +- .simd = ISA_EXT_SIMD_LASX, +- }, +-}; +- +-struct loongarch_cache +-loongarch_cpu_cache[N_TUNE_TYPES] = { +- [CPU_LOONGARCH64] = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- [CPU_LA464] = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +- [CPU_LA664] = { +- .l1d_line_size = 64, +- .l1d_size = 64, +- .l2d_size = 256, +- .simultaneous_prefetches = 4, +- }, +-}; +- +-struct loongarch_align +-loongarch_cpu_align[N_TUNE_TYPES] = { +- [CPU_LOONGARCH64] = { +- .function = "32", +- .label = "16", +- }, +- [CPU_LA464] = { +- .function = "32", +- .label = "16", +- }, +- [CPU_LA664] = { +- .function = "32", +- .label = "16", +- }, +-}; +- +- +-/* Default RTX cost initializer. */ +-#define COSTS_N_INSNS(N) ((N) * 4) +-#define DEFAULT_COSTS \ +- .fp_add = COSTS_N_INSNS (1), \ +- .fp_mult_sf = COSTS_N_INSNS (2), \ +- .fp_mult_df = COSTS_N_INSNS (4), \ +- .fp_div_sf = COSTS_N_INSNS (6), \ +- .fp_div_df = COSTS_N_INSNS (8), \ +- .int_mult_si = COSTS_N_INSNS (1), \ +- .int_mult_di = COSTS_N_INSNS (1), \ +- .int_div_si = COSTS_N_INSNS (4), \ +- .int_div_di = COSTS_N_INSNS (6), \ +- .branch_cost = 6, \ +- .memory_latency = 4 +- +-/* The following properties cannot be looked up directly using "cpucfg". +- So it is necessary to provide a default value for "unknown native" +- tune targets (i.e. -mtune=native while PRID does not correspond to +- any known "-mtune" type). */ +- +-struct loongarch_rtx_cost_data +-loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = { +- [CPU_NATIVE] = { +- DEFAULT_COSTS +- }, +- [CPU_LOONGARCH64] = { +- DEFAULT_COSTS +- }, +- [CPU_LA464] = { +- DEFAULT_COSTS +- }, +- [CPU_LA664] = { +- DEFAULT_COSTS +- }, +-}; +- +-/* RTX costs to use when optimizing for size. */ +-const struct loongarch_rtx_cost_data +-loongarch_rtx_cost_optimize_size = { +- .fp_add = 4, +- .fp_mult_sf = 4, +- .fp_mult_df = 4, +- .fp_div_sf = 4, +- .fp_div_df = 4, +- .int_mult_si = 4, +- .int_mult_di = 4, +- .int_div_si = 4, +- .int_div_di = 4, +- .branch_cost = 6, +- .memory_latency = 4, +-}; +- +-int +-loongarch_cpu_issue_rate[N_TUNE_TYPES] = { +- [CPU_NATIVE] = 4, +- [CPU_LOONGARCH64] = 4, +- [CPU_LA464] = 4, +- [CPU_LA664] = 6, +-}; +- +-int +-loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = { +- [CPU_NATIVE] = 4, +- [CPU_LOONGARCH64] = 4, +- [CPU_LA464] = 4, +- [CPU_LA664] = 6, +-}; +- +-/* Wiring string definitions from loongarch-str.h to global arrays +- with standard index values from loongarch-opts.h, so we can +- print config-related messages and do ABI self-spec filtering +- from the driver in a self-consistent manner. */ +- +-const char* +-loongarch_isa_base_strings[N_ISA_BASE_TYPES] = { +- [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100, +- [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110, +-}; +- +-const char* +-loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = { +- [ISA_EXT_NONE] = STR_NONE, +- [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32, +- [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64, +- [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX, +- [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX, +-}; +- +-const char* +-loongarch_abi_base_strings[N_ABI_BASE_TYPES] = { +- [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D, +- [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F, +- [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S, +-}; +- +-const char* +-loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = { +- [ABI_EXT_BASE] = STR_ABI_EXT_BASE, +-}; +- +-const char* +-loongarch_cmodel_strings[] = { +- [CMODEL_NORMAL] = STR_CMODEL_NORMAL, +- [CMODEL_TINY] = STR_CMODEL_TINY, +- [CMODEL_TINY_STATIC] = STR_CMODEL_TS, +- [CMODEL_MEDIUM] = STR_CMODEL_MEDIUM, +- [CMODEL_LARGE] = STR_CMODEL_LARGE, +- [CMODEL_EXTREME] = STR_CMODEL_EXTREME, +-}; +- +- +-/* ABI-related definitions. */ +-const struct loongarch_isa +-abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { +- [ABI_BASE_LP64D] = { +- [ABI_EXT_BASE] = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU64, +- .simd = 0 +- }, +- }, +- [ABI_BASE_LP64F] = { +- [ABI_EXT_BASE] = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_FPU32, +- .simd = 0 +- }, +- }, +- [ABI_BASE_LP64S] = { +- [ABI_EXT_BASE] = { +- .base = ISA_BASE_LA64V100, +- .fpu = ISA_EXT_NONE, +- .simd = 0 +- }, +- }, +-}; +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +new file mode 100644 +index 000000000..6990c86c2 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -0,0 +1,187 @@ ++/* LoongArch static properties. ++ Copyright (C) 2021-2023 Free Software Foundation, Inc. ++ Contributed by Loongson Ltd. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "loongarch-def.h" ++#include "loongarch-str.h" ++ ++template ++using array = loongarch_def_array; ++ ++template ++using array_tune = array; ++ ++template ++using array_arch = array; ++ ++/* CPU property tables. */ ++array_tune loongarch_cpu_strings = array_tune () ++ .set (CPU_NATIVE, STR_CPU_NATIVE) ++ .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT) ++ .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (CPU_LA464, STR_CPU_LA464) ++ .set (CPU_LA664, STR_CPU_LA664); ++ ++array_arch loongarch_cpu_default_isa = ++ array_arch () ++ .set (CPU_LOONGARCH64, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64)) ++ .set (CPU_LA464, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LASX)) ++ .set (CPU_LA664, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V110) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LASX)); ++ ++static inline loongarch_cache la464_cache () ++{ ++ return loongarch_cache () ++ .l1d_line_size_ (64) ++ .l1d_size_ (64) ++ .l2d_size_ (256) ++ .simultaneous_prefetches_ (4); ++} ++ ++array_tune loongarch_cpu_cache = ++ array_tune () ++ .set (CPU_LOONGARCH64, la464_cache ()) ++ .set (CPU_LA464, la464_cache ()) ++ .set (CPU_LA664, la464_cache ()); ++ ++static inline loongarch_align la464_align () ++{ ++ return loongarch_align ().function_ ("32").label_ ("16"); ++} ++ ++array_tune loongarch_cpu_align = ++ array_tune () ++ .set (CPU_LOONGARCH64, la464_align ()) ++ .set (CPU_LA464, la464_align ()) ++ .set (CPU_LA664, la464_align ()); ++ ++#define COSTS_N_INSNS(N) ((N) * 4) ++ ++/* Default RTX cost initializer. */ ++loongarch_rtx_cost_data::loongarch_rtx_cost_data () ++ : fp_add (COSTS_N_INSNS (1)), ++ fp_mult_sf (COSTS_N_INSNS (2)), ++ fp_mult_df (COSTS_N_INSNS (4)), ++ fp_div_sf (COSTS_N_INSNS (6)), ++ fp_div_df (COSTS_N_INSNS (8)), ++ int_mult_si (COSTS_N_INSNS (1)), ++ int_mult_di (COSTS_N_INSNS (1)), ++ int_div_si (COSTS_N_INSNS (4)), ++ int_div_di (COSTS_N_INSNS (6)), ++ branch_cost (6), ++ memory_latency (4) {} ++ ++/* The following properties cannot be looked up directly using "cpucfg". ++ So it is necessary to provide a default value for "unknown native" ++ tune targets (i.e. -mtune=native while PRID does not correspond to ++ any known "-mtune" type). Currently all numbers are default. */ ++array_tune loongarch_cpu_rtx_cost_data = ++ array_tune (); ++ ++/* RTX costs to use when optimizing for size. */ ++const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = ++ loongarch_rtx_cost_data () ++ .fp_add_ (4) ++ .fp_mult_sf_ (4) ++ .fp_mult_df_ (4) ++ .fp_div_sf_ (4) ++ .fp_div_df_ (4) ++ .int_mult_si_ (4) ++ .int_mult_di_ (4) ++ .int_div_si_ (4) ++ .int_div_di_ (4); ++ ++array_tune loongarch_cpu_issue_rate = array_tune () ++ .set (CPU_NATIVE, 4) ++ .set (CPU_LOONGARCH64, 4) ++ .set (CPU_LA464, 4) ++ .set (CPU_LA664, 6); ++ ++array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () ++ .set (CPU_NATIVE, 4) ++ .set (CPU_LOONGARCH64, 4) ++ .set (CPU_LA464, 4) ++ .set (CPU_LA664, 6); ++ ++/* Wiring string definitions from loongarch-str.h to global arrays ++ with standard index values from loongarch-opts.h, so we can ++ print config-related messages and do ABI self-spec filtering ++ from the driver in a self-consistent manner. */ ++ ++array loongarch_isa_base_strings = ++ array () ++ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) ++ .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); ++ ++array loongarch_isa_ext_strings = ++ array () ++ .set (ISA_EXT_NONE, STR_NONE) ++ .set (ISA_EXT_FPU32, STR_ISA_EXT_FPU32) ++ .set (ISA_EXT_FPU64, STR_ISA_EXT_FPU64) ++ .set (ISA_EXT_SIMD_LSX, STR_ISA_EXT_LSX) ++ .set (ISA_EXT_SIMD_LASX, STR_ISA_EXT_LASX); ++ ++array loongarch_abi_base_strings = ++ array () ++ .set (ABI_BASE_LP64D, STR_ABI_BASE_LP64D) ++ .set (ABI_BASE_LP64F, STR_ABI_BASE_LP64F) ++ .set (ABI_BASE_LP64S, STR_ABI_BASE_LP64S); ++ ++array loongarch_abi_ext_strings = ++ array () ++ .set (ABI_EXT_BASE, STR_ABI_EXT_BASE); ++ ++array loongarch_cmodel_strings = ++ array () ++ .set (CMODEL_NORMAL, STR_CMODEL_NORMAL) ++ .set (CMODEL_TINY, STR_CMODEL_TINY) ++ .set (CMODEL_TINY_STATIC, STR_CMODEL_TS) ++ .set (CMODEL_MEDIUM, STR_CMODEL_MEDIUM) ++ .set (CMODEL_LARGE, STR_CMODEL_LARGE) ++ .set (CMODEL_EXTREME, STR_CMODEL_EXTREME); ++ ++array, N_ABI_BASE_TYPES> ++ abi_minimal_isa = array, ++ N_ABI_BASE_TYPES> () ++ .set (ABI_BASE_LP64D, ++ array () ++ .set (ABI_EXT_BASE, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU64))) ++ .set (ABI_BASE_LP64F, ++ array () ++ .set (ABI_EXT_BASE, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64V100) ++ .fpu_ (ISA_EXT_FPU32))) ++ .set (ABI_BASE_LP64S, ++ array () ++ .set (ABI_EXT_BASE, ++ loongarch_isa ().base_ (ISA_BASE_LA64V100))); +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index ef848f606..5ac70dfdd 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -50,20 +50,18 @@ along with GCC; see the file COPYING3. If not see + #include + #endif + ++#include "loongarch-def-array.h" + #include "loongarch-tune.h" + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* enum isa_base */ +-extern const char* loongarch_isa_base_strings[]; + + /* LoongArch V1.00. */ + #define ISA_BASE_LA64V100 0 + /* LoongArch V1.10. */ + #define ISA_BASE_LA64V110 1 + #define N_ISA_BASE_TYPES 2 ++extern loongarch_def_array ++ loongarch_isa_base_strings; + + #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is +@@ -72,7 +70,6 @@ extern int64_t loongarch_isa_base_features[]; + #endif + + /* enum isa_ext_* */ +-extern const char* loongarch_isa_ext_strings[]; + #define ISA_EXT_NONE 0 + #define ISA_EXT_FPU32 1 + #define ISA_EXT_FPU64 2 +@@ -80,13 +77,16 @@ extern const char* loongarch_isa_ext_strings[]; + #define ISA_EXT_SIMD_LSX 3 + #define ISA_EXT_SIMD_LASX 4 + #define N_ISA_EXT_TYPES 5 ++extern loongarch_def_array ++ loongarch_isa_ext_strings; + + /* enum abi_base */ +-extern const char* loongarch_abi_base_strings[]; + #define ABI_BASE_LP64D 0 + #define ABI_BASE_LP64F 1 + #define ABI_BASE_LP64S 2 + #define N_ABI_BASE_TYPES 3 ++extern loongarch_def_array ++ loongarch_abi_base_strings; + + #define TO_LP64_ABI_BASE(C) (C) + +@@ -99,12 +99,12 @@ extern const char* loongarch_abi_base_strings[]; + + + /* enum abi_ext */ +-extern const char* loongarch_abi_ext_strings[]; + #define ABI_EXT_BASE 0 + #define N_ABI_EXT_TYPES 1 ++extern loongarch_def_array ++ loongarch_abi_ext_strings; + + /* enum cmodel */ +-extern const char* loongarch_cmodel_strings[]; + #define CMODEL_NORMAL 0 + #define CMODEL_TINY 1 + #define CMODEL_TINY_STATIC 2 +@@ -112,6 +112,8 @@ extern const char* loongarch_cmodel_strings[]; + #define CMODEL_LARGE 4 + #define CMODEL_EXTREME 5 + #define N_CMODEL_TYPES 6 ++extern loongarch_def_array ++ loongarch_cmodel_strings; + + /* enum explicit_relocs */ + #define EXPLICIT_RELOCS_AUTO 0 +@@ -126,7 +128,6 @@ extern const char* loongarch_cmodel_strings[]; + #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) + + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + /* Internal representation of the target. */ + struct loongarch_isa + { +@@ -139,6 +140,13 @@ struct loongarch_isa + + Using int64_t instead of HOST_WIDE_INT for C compatibility. */ + int64_t evolution; ++ ++ loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} ++ loongarch_isa base_ (int _base) { base = _base; return *this; } ++ loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } ++ loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } ++ loongarch_isa evolution_ (int64_t _evolution) ++ { evolution = _evolution; return *this; } + }; + + struct loongarch_abi +@@ -156,9 +164,6 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + +-extern struct loongarch_isa loongarch_cpu_default_isa[]; +-#endif +- + /* CPU properties. */ + /* index */ + #define CPU_NATIVE 0 +@@ -170,15 +175,19 @@ extern struct loongarch_isa loongarch_cpu_default_isa[]; + #define N_TUNE_TYPES 5 + + /* parallel tables. */ +-extern const char* loongarch_cpu_strings[]; +-extern int loongarch_cpu_issue_rate[]; +-extern int loongarch_cpu_multipass_dfa_lookahead[]; ++extern loongarch_def_array ++ loongarch_cpu_strings; ++extern loongarch_def_array ++ loongarch_cpu_default_isa; ++extern loongarch_def_array ++ loongarch_cpu_issue_rate; ++extern loongarch_def_array ++ loongarch_cpu_multipass_dfa_lookahead; ++extern loongarch_def_array ++ loongarch_cpu_cache; ++extern loongarch_def_array ++ loongarch_cpu_align; ++extern loongarch_def_array ++ loongarch_cpu_rtx_cost_data; + +-extern struct loongarch_cache loongarch_cpu_cache[]; +-extern struct loongarch_align loongarch_cpu_align[]; +-extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[]; +- +-#ifdef __cplusplus +-} +-#endif + #endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 390720479..45fc521e4 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -163,6 +163,7 @@ loongarch_config_target (struct loongarch_target *target, + int follow_multilib_list_p) + { + struct loongarch_target t; ++ + if (!target) + return; + +@@ -657,12 +658,18 @@ abi_str (struct loongarch_abi abi) + strlen (loongarch_abi_base_strings[abi.base])); + else + { ++ /* This situation has not yet occurred, so in order to avoid the ++ -Warray-bounds warning during C++ syntax checking, this part ++ of the code is commented first. */ ++ /* + APPEND_STRING (loongarch_abi_base_strings[abi.base]) + APPEND1 ('/') + APPEND_STRING (loongarch_abi_ext_strings[abi.ext]) + APPEND1 ('\0') + + return XOBFINISH (&msg_obstack, const char *); ++ */ ++ gcc_unreachable (); + } + } + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 9b3d023ac..0dabf1551 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -21,7 +21,10 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_OPTS_H + #define LOONGARCH_OPTS_H + ++/* This is a C++ header and it shouldn't be used by target libraries. */ ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include "loongarch-def.h" ++#endif + + /* Target configuration */ + extern struct loongarch_target la_target; +@@ -33,7 +36,6 @@ struct loongarch_flags { + int sx[2]; + }; + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + + /* Initialize loongarch_target from separate option variables. */ + void +@@ -54,7 +56,6 @@ void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); +-#endif + + + /* Macros for common conditional expressions used in loongarch.{c,h,md} */ +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index d961963f0..616b94e87 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_TUNE_H + #define LOONGARCH_TUNE_H + ++#include "loongarch-def-array.h" ++ + /* RTX costs of various operations on the different architectures. */ + struct loongarch_rtx_cost_data + { +@@ -35,6 +37,76 @@ struct loongarch_rtx_cost_data + unsigned short int_div_di; + unsigned short branch_cost; + unsigned short memory_latency; ++ ++ /* Default RTX cost initializer, implemented in loongarch-def.cc. */ ++ loongarch_rtx_cost_data (); ++ ++ loongarch_rtx_cost_data fp_add_ (unsigned short _fp_add) ++ { ++ fp_add = _fp_add; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_mult_sf_ (unsigned short _fp_mult_sf) ++ { ++ fp_mult_sf = _fp_mult_sf; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_mult_df_ (unsigned short _fp_mult_df) ++ { ++ fp_mult_df = _fp_mult_df; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_div_sf_ (unsigned short _fp_div_sf) ++ { ++ fp_div_sf = _fp_div_sf; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data fp_div_df_ (unsigned short _fp_div_df) ++ { ++ fp_div_df = _fp_div_df; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_mult_si_ (unsigned short _int_mult_si) ++ { ++ int_mult_si = _int_mult_si; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_mult_di_ (unsigned short _int_mult_di) ++ { ++ int_mult_di = _int_mult_di; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_div_si_ (unsigned short _int_div_si) ++ { ++ int_div_si = _int_div_si; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data int_div_di_ (unsigned short _int_div_di) ++ { ++ int_div_di = _int_div_di; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) ++ { ++ branch_cost = _branch_cost; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency) ++ { ++ memory_latency = _memory_latency; ++ return *this; ++ } ++ + }; + + /* Costs to use when optimizing for size. */ +@@ -42,10 +114,39 @@ extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size; + + /* Cache size record of known processor models. */ + struct loongarch_cache { +- int l1d_line_size; /* bytes */ +- int l1d_size; /* KiB */ +- int l2d_size; /* kiB */ +- int simultaneous_prefetches; /* number of parallel prefetch */ ++ int l1d_line_size; /* bytes */ ++ int l1d_size; /* KiB */ ++ int l2d_size; /* kiB */ ++ int simultaneous_prefetches; /* number of parallel prefetch */ ++ ++ loongarch_cache () : l1d_line_size (0), ++ l1d_size (0), ++ l2d_size (0), ++ simultaneous_prefetches (0) {} ++ ++ loongarch_cache l1d_line_size_ (int _l1d_line_size) ++ { ++ l1d_line_size = _l1d_line_size; ++ return *this; ++ } ++ ++ loongarch_cache l1d_size_ (int _l1d_size) ++ { ++ l1d_size = _l1d_size; ++ return *this; ++ } ++ ++ loongarch_cache l2d_size_ (int _l2d_size) ++ { ++ l2d_size = _l2d_size; ++ return *this; ++ } ++ ++ loongarch_cache simultaneous_prefetches_ (int _simultaneous_prefetches) ++ { ++ simultaneous_prefetches = _simultaneous_prefetches; ++ return *this; ++ } + }; + + /* Alignment for functions and labels for best performance. For new uarchs +@@ -54,6 +155,20 @@ struct loongarch_cache { + struct loongarch_align { + const char *function; /* default value for -falign-functions */ + const char *label; /* default value for -falign-labels */ ++ ++ loongarch_align () : function (nullptr), label (nullptr) {} ++ ++ loongarch_align function_ (const char *_function) ++ { ++ function = _function; ++ return *this; ++ } ++ ++ loongarch_align label_ (const char *_label) ++ { ++ label = _label; ++ return *this; ++ } + }; + + #endif /* LOONGARCH_TUNE_H */ +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 57b1176bc..a1a40431f 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -64,8 +64,8 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ + $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +-loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) +- $(CC) -c $(ALL_CFLAGS) $(INCLUDES) $< ++loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ +-- +2.43.0 + diff --git a/0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch b/0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch new file mode 100644 index 0000000..0ad4086 --- /dev/null +++ b/0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch @@ -0,0 +1,261 @@ +From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 1 Dec 2023 11:51:51 +0800 +Subject: [PATCH 057/188] LoongArch: Remove the definition of ISA_BASE_LA64V110 + from the code. + +The instructions defined in LoongArch Reference Manual v1.1 are not the instruction +set v1.1 version. The CPU defined later may only support some instructions in +LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and +related definitions are removed here. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro. + (fill_native_cpu_config): Define a new variable hw_isa_evolution record the + extended instruction set support read from cpucfg. + * config/loongarch/loongarch-def.cc: Set evolution at initialization. + * config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete. + (ISA_BASE_LA64V110): Likewise. + (N_ISA_BASE_TYPES): Likewise. + (defined): Likewise. + * config/loongarch/loongarch-opts.cc: Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise. + (ISA_BASE_IS_LA64V110): Likewise. + * config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise. + * config/loongarch/loongarch.opt: Regenerate. +--- + .../loongarch/genopts/loongarch-strings | 1 - + gcc/config/loongarch/genopts/loongarch.opt.in | 3 --- + gcc/config/loongarch/loongarch-cpu.cc | 23 +++++-------------- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++---- + gcc/config/loongarch/loongarch-def.h | 12 ++-------- + gcc/config/loongarch/loongarch-opts.cc | 3 --- + gcc/config/loongarch/loongarch-opts.h | 4 +--- + gcc/config/loongarch/loongarch-str.h | 1 - + gcc/config/loongarch/loongarch.opt | 3 --- + 9 files changed, 19 insertions(+), 45 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 6c8a42af2..411ad5696 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -30,7 +30,6 @@ STR_CPU_LA664 la664 + + # Base architecture + STR_ISA_BASE_LA64V100 la64 +-STR_ISA_BASE_LA64V110 la64v1.1 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a49de07c9..cd5e75e4f 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -32,9 +32,6 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) + +-EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) +- + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index bbce82c9c..7e0625835 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see + #include "config.h" + #include "system.h" + #include "coretypes.h" +-#include "tm.h" + #include "diagnostic-core.h" + + #include "loongarch-def.h" +@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" + +-/* loongarch_isa_base_features defined here instead of loongarch-def.c +- because we need to use options.h. Pay attention on the order of elements +- in the initializer becaue ISO C++ does not allow C99 designated +- initializers! */ +- +-#define ISA_BASE_LA64V110_FEATURES \ +- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ +- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) +- +-int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = { +- /* [ISA_BASE_LA64V100] = */ 0, +- /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES, +-}; + + /* Native CPU detection with "cpucfg" */ + static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 }; +@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Use the native value anyways. */ + preset.simd = tmp; + ++ ++ int64_t hw_isa_evolution = 0; ++ + /* Features added during ISA evolution. */ + for (const auto &entry: cpucfg_map) + if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit) +- preset.evolution |= entry.isa_evolution_bit; ++ hw_isa_evolution |= entry.isa_evolution_bit; + + if (native_cpu_type != CPU_NATIVE) + { + /* Check if the local CPU really supports the features of the base + ISA of probed native_cpu_type. If any feature is not detected, + either GCC or the hardware is buggy. */ +- auto base_isa_feature = loongarch_isa_base_features[preset.base]; +- if ((preset.evolution & base_isa_feature) != base_isa_feature) ++ if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) + warning (0, + "detected base architecture %qs, but some of its " + "features are not detected; the detected base " +@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + "features will be enabled", + loongarch_isa_base_strings[preset.base]); + } ++ preset.evolution = hw_isa_evolution; + } + + if (tune_native_p) +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6990c86c2..bc6997e45 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++ + #include "loongarch-def.h" + #include "loongarch-str.h" + +@@ -51,9 +56,11 @@ array_arch loongarch_cpu_default_isa = + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V110) ++ .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64) +- .simd_ (ISA_EXT_SIMD_LASX)); ++ .simd_ (ISA_EXT_SIMD_LASX) ++ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); + + static inline loongarch_cache la464_cache () + { +@@ -136,8 +143,7 @@ array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () + + array loongarch_isa_base_strings = + array () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) +- .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); ++ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); + + array loongarch_isa_ext_strings = + array () +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 5ac70dfdd..f8f36f0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3. If not see + /* enum isa_base */ + + /* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-/* LoongArch V1.10. */ +-#define ISA_BASE_LA64V110 1 +-#define N_ISA_BASE_TYPES 2 ++#define ISA_BASE_LA64V100 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array + loongarch_isa_base_strings; + +-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) +-/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is +- we cannot use the C++ header options.h in loongarch-def.c. */ +-extern int64_t loongarch_isa_base_features[]; +-#endif +- + /* enum isa_ext_* */ + #define ISA_EXT_NONE 0 + #define ISA_EXT_FPU32 1 +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 45fc521e4..d31becc67 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -285,9 +285,6 @@ config_target_isa: + /* Get default ISA from "-march" or its default value. */ + t.isa = loongarch_cpu_default_isa[t.cpu_arch]; + +- if (t.cpu_arch != CPU_NATIVE) +- t.isa.evolution |= loongarch_isa_base_features[t.isa.base]; +- + /* Apply incremental changes. */ + /* "-march=native" overrides the default FPU type. */ + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 0dabf1551..7010ddfec 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -77,8 +77,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \ +- || la_target.isa.base == ISA_BASE_LA64V110) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +@@ -90,7 +89,6 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) +-#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) + + /* Note: optimize_size may vary across functions, + while -m[no]-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 0fee9abe5..7144bbe28 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -33,7 +33,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA664 "la664" + + #define STR_ISA_BASE_LA64V100 "la64" +-#define STR_ISA_BASE_LA64V110 "la64v1.1" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index ea0d5bb4e..7fe36feb9 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -40,9 +40,6 @@ Basic ISAs of LoongArch: + EnumValue + Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) + +-EnumValue +-Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110) +- + ;; ISA extensions / adjustments + Enum + Name(isa_ext_fpu) Type(int) +-- +2.43.0 + diff --git a/0058-LoongArch-Add-support-for-xorsign.patch b/0058-LoongArch-Add-support-for-xorsign.patch new file mode 100644 index 0000000..b4fd958 --- /dev/null +++ b/0058-LoongArch-Add-support-for-xorsign.patch @@ -0,0 +1,412 @@ +From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Fri, 17 Nov 2023 17:00:21 +0800 +Subject: [PATCH 058/188] LoongArch: Add support for xorsign. + +This patch adds support for xorsign pattern to scalar fp and vector. With the +new expands, uniformly using vector bitwise logical operations to handle xorsign. + +On LoongArch64, floating-point registers and vector registers share the same register, +so this patch also allows conversion between LSX vector mode and scalar fp mode to +avoid unnecessary instruction generation. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xorsign3): New expander. + * config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow + conversion between LSX vector mode and scalar fp mode. + * config/loongarch/loongarch.md (@xorsign3): New expander. + * config/loongarch/lsx.md (@xorsign3): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test. + * gcc.target/loongarch/xorsign-run.c: New test. + * gcc.target/loongarch/xorsign.c: New test. +--- + gcc/config/loongarch/lasx.md | 22 +++++-- + gcc/config/loongarch/loongarch.cc | 5 ++ + gcc/config/loongarch/loongarch.md | 17 ++++++ + gcc/config/loongarch/lsx.md | 23 +++++-- + .../loongarch/vector/lasx/lasx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xorsign.c | 19 ++++++ + .../loongarch/vector/lsx/lsx-xorsign-run.c | 60 +++++++++++++++++++ + .../loongarch/vector/lsx/lsx-xorsign.c | 19 ++++++ + .../gcc.target/loongarch/xorsign-run.c | 25 ++++++++ + gcc/testsuite/gcc.target/loongarch/xorsign.c | 18 ++++++ + 10 files changed, 260 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 116b30c07..de7c88f14 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1065,10 +1065,10 @@ + (set_attr "mode" "")]) + + (define_insn "xor3" +- [(set (match_operand:ILASX 0 "register_operand" "=f,f,f") +- (xor:ILASX +- (match_operand:ILASX 1 "register_operand" "f,f,f") +- (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] ++ [(set (match_operand:LASX 0 "register_operand" "=f,f,f") ++ (xor:LASX ++ (match_operand:LASX 1 "register_operand" "f,f,f") ++ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "ISA_HAS_LASX" + "@ + xvxor.v\t%u0,%u1,%u2 +@@ -3061,6 +3061,20 @@ + operands[5] = gen_reg_rtx (mode); + }) + ++(define_expand "xorsign3" ++ [(set (match_dup 4) ++ (and:FLASX (match_dup 3) ++ (match_operand:FLASX 2 "register_operand"))) ++ (set (match_operand:FLASX 0 "register_operand") ++ (xor:FLASX (match_dup 4) ++ (match_operand:FLASX 1 "register_operand")))] ++ "ISA_HAS_LASX" ++{ ++ operands[3] = loongarch_build_signbit_mask (mode, 1, 0); ++ ++ operands[4] = gen_reg_rtx (mode); ++}) ++ + + (define_insn "absv4df2" + [(set (match_operand:V4DF 0 "register_operand" "=f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3ef7e3605..3c8ae9a42 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to, + if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)) + return true; + ++ /* Allow conversion between LSX vector mode and scalar fp mode. */ ++ if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to)) ++ || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)))) ++ return true; ++ + return !reg_classes_intersect_p (FP_REGS, rclass); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index cfd7a8ec6..afc3c591f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1164,6 +1164,23 @@ + "fcopysign.\t%0,%1,%2" + [(set_attr "type" "fcopysign") + (set_attr "mode" "")]) ++ ++(define_expand "@xorsign3" ++ [(match_operand:ANYF 0 "register_operand") ++ (match_operand:ANYF 1 "register_operand") ++ (match_operand:ANYF 2 "register_operand")] ++ "ISA_HAS_LSX" ++{ ++ machine_mode lsx_mode ++ = mode == SFmode ? V4SFmode : V2DFmode; ++ rtx tmp = gen_reg_rtx (lsx_mode); ++ rtx op1 = lowpart_subreg (lsx_mode, operands[1], mode); ++ rtx op2 = lowpart_subreg (lsx_mode, operands[2], mode); ++ emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); ++ emit_move_insn (operands[0], ++ lowpart_subreg (mode, tmp, lsx_mode)); ++ DONE; ++}) + + ;; + ;; .................... +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 232399934..ce6ec6d69 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -957,10 +957,10 @@ + (set_attr "mode" "")]) + + (define_insn "xor3" +- [(set (match_operand:ILSX 0 "register_operand" "=f,f,f") +- (xor:ILSX +- (match_operand:ILSX 1 "register_operand" "f,f,f") +- (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] ++ [(set (match_operand:LSX 0 "register_operand" "=f,f,f") ++ (xor:LSX ++ (match_operand:LSX 1 "register_operand" "f,f,f") ++ (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "ISA_HAS_LSX" + "@ + vxor.v\t%w0,%w1,%w2 +@@ -2786,6 +2786,21 @@ + operands[5] = gen_reg_rtx (mode); + }) + ++(define_expand "@xorsign3" ++ [(set (match_dup 4) ++ (and:FLSX (match_dup 3) ++ (match_operand:FLSX 2 "register_operand"))) ++ (set (match_operand:FLSX 0 "register_operand") ++ (xor:FLSX (match_dup 4) ++ (match_operand:FLSX 1 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ operands[3] = loongarch_build_signbit_mask (mode, 1, 0); ++ ++ operands[4] = gen_reg_rtx (mode); ++}) ++ ++ + (define_insn "absv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=f") + (abs:V2DF (match_operand:V2DF 1 "register_operand" "f")))] +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +new file mode 100644 +index 000000000..2295503d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ ++/* { dg-require-effective-target loongarch_asx_hw } */ ++ ++#include "lasx-xorsign.c" ++ ++extern void abort (); ++ ++#define N 16 ++float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, ++ -12.5f, -15.6f, -18.7f, -21.8f, ++ 24.9f, 27.1f, 30.2f, 33.3f, ++ 36.4f, 39.5f, 42.6f, 45.7f}; ++float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, ++ -9.0f, 1.0f, -2.0f, 3.0f, ++ -4.0f, -5.0f, 6.0f, 7.0f, ++ -8.0f, -9.0f, 10.0f, 11.0f}; ++float r[N]; ++ ++double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d, ++ -12.5d, -15.6d, -18.7d, -21.8d, ++ 24.9d, 27.1d, 30.2d, 33.3d, ++ 36.4d, 39.5d, 42.6d, 45.7d}; ++double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d, ++ -9.0d, 1.0d, -2.0d, 3.0d, ++ -4.0d, -5.0d, 6.0d, 7.0d, ++ -8.0d, -9.0d, 10.0d, 11.0d}; ++double rd[N]; ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsignf (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) ++ abort (); ++} ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsign (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i])) ++ abort (); ++} ++ ++int ++main (void) ++{ ++ my_xorsignf (r, a, b, N); ++ /* check results: */ ++ check_xorsignf (); ++ ++ my_xorsign (rd, ad, bd, N); ++ /* check results: */ ++ check_xorsign (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c +new file mode 100644 +index 000000000..190a9239b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ ++/* { dg-final { scan-assembler "xvand\\.v" } } */ ++/* { dg-final { scan-assembler "xvxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "xvfmul" } } */ ++ ++double ++my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ a[i] = b[i] * __builtin_copysign (1.0d, c[i]); ++} ++ ++float ++my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ a[i] = b[i] * __builtin_copysignf (1.0f, c[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c +new file mode 100644 +index 000000000..22c5c03cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c +@@ -0,0 +1,60 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ ++/* { dg-require-effective-target loongarch_sx_hw } */ ++ ++#include "lsx-xorsign.c" ++ ++extern void abort (); ++ ++#define N 16 ++float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, ++ -12.5f, -15.6f, -18.7f, -21.8f, ++ 24.9f, 27.1f, 30.2f, 33.3f, ++ 36.4f, 39.5f, 42.6f, 45.7f}; ++float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, ++ -9.0f, 1.0f, -2.0f, 3.0f, ++ -4.0f, -5.0f, 6.0f, 7.0f, ++ -8.0f, -9.0f, 10.0f, 11.0f}; ++float r[N]; ++ ++double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d, ++ -12.5d, -15.6d, -18.7d, -21.8d, ++ 24.9d, 27.1d, 30.2d, 33.3d, ++ 36.4d, 39.5d, 42.6d, 45.7d}; ++double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d, ++ -9.0d, 1.0d, -2.0d, 3.0d, ++ -4.0d, -5.0d, 6.0d, 7.0d, ++ -8.0d, -9.0d, 10.0d, 11.0d}; ++double rd[N]; ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsignf (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) ++ abort (); ++} ++ ++void ++__attribute__ ((optimize ("-O0"))) ++check_xorsign (void) ++{ ++ for (int i = 0; i < N; i++) ++ if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i])) ++ abort (); ++} ++ ++int ++main (void) ++{ ++ my_xorsignf (r, a, b, N); ++ /* check results: */ ++ check_xorsignf (); ++ ++ my_xorsign (rd, ad, bd, N); ++ /* check results: */ ++ check_xorsign (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c +new file mode 100644 +index 000000000..c2694c11e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ ++/* { dg-final { scan-assembler "vand\\.v" } } */ ++/* { dg-final { scan-assembler "vxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "vfmul" } } */ ++ ++double ++my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ a[i] = b[i] * __builtin_copysign (1.0d, c[i]); ++} ++ ++float ++my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) ++{ ++ for (int i = 0; i < n; i++) ++ a[i] = b[i] * __builtin_copysignf (1.0f, c[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c +new file mode 100644 +index 000000000..b4f28adf8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-require-effective-target loongarch_sx_hw } */ ++ ++extern void abort(void); ++ ++static double x = 2.0; ++static float y = 2.0; ++ ++int main() ++{ ++ if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5) ++ abort(); ++ ++ if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5) ++ abort(); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign.c b/gcc/testsuite/gcc.target/loongarch/xorsign.c +new file mode 100644 +index 000000000..ca80603d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xorsign.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler "vand\\.v" } } */ ++/* { dg-final { scan-assembler "vxor\\.v" } } */ ++/* { dg-final { scan-assembler-not "fcopysign" } } */ ++/* { dg-final { scan-assembler-not "fmul" } } */ ++ ++double ++my_xorsign (double a, double b) ++{ ++ return a * __builtin_copysign (1.0d, b); ++} ++ ++float ++my_xorsignf (float a, float b) ++{ ++ return a * __builtin_copysignf (1.0f, b); ++} +-- +2.43.0 + diff --git a/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch b/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch new file mode 100644 index 0000000..8edd4f9 --- /dev/null +++ b/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch @@ -0,0 +1,730 @@ +From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 6 Dec 2023 15:04:49 +0800 +Subject: [PATCH 059/188] LoongArch: Add support for LoongArch V1.1 approximate + instructions. + +This patch adds define_insn/builtins/intrinsics for these instructions, and add option +-mfrecipe to control instruction generation. + +gcc/ChangeLog: + + * config/loongarch/genopts/isa-evolution.in (fecipe): Add. + * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + * config/loongarch/lasx.md (lasx_xvfrecipe_): New insn pattern. + (lasx_xvfrsqrte_): Ditto. + * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. + (__lasx_xvfrecipe_d): Ditto. + (__lasx_xvfrsqrte_s): Ditto. + (__lasx_xvfrsqrte_d): Ditto. + * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. + (LSX_EXT_BUILTIN): New macro. + (LASX_EXT_BUILTIN): Ditto. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". + * config/loongarch/loongarch-def.cc: Regenerate. + * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. + * config/loongarch/loongarch.md (loongarch_frecipe_): New insn pattern. + (loongarch_frsqrte_): Ditto. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/lsx.md (lsx_vfrecipe_): New insn pattern. + (lsx_vfrsqrte_): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. + (__lsx_vfrecipe_d): Ditto. + (__lsx_vfrsqrte_s): Ditto. + (__lsx_vfrsqrte_d): Ditto. + * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. +--- + gcc/config/loongarch/genopts/isa-evolution.in | 1 + + gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++ + gcc/config/loongarch/lasx.md | 24 +++++++++++ + gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++ + gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++ + gcc/config/loongarch/loongarch-c.cc | 3 ++ + gcc/config/loongarch/loongarch-cpucfg-map.h | 1 + + gcc/config/loongarch/loongarch-def.cc | 3 +- + gcc/config/loongarch/loongarch-str.h | 1 + + gcc/config/loongarch/loongarch.cc | 1 + + gcc/config/loongarch/loongarch.md | 35 +++++++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++ + gcc/config/loongarch/lsx.md | 24 +++++++++++ + gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++ + gcc/doc/extend.texi | 35 ++++++++++++++++ + .../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++ + .../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++ + .../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++ + 18 files changed, 365 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index a6bc3f87f..11a198b64 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,3 +1,4 @@ ++2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. + 2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. + 2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions. + 2 28 lamcas Support amcas[_db].{b/h/w/d} instructions. +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 2833f1487..22035e767 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + } + #endif + ++#ifdef __loongarch_frecipe ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_s (float _1) ++{ ++ __builtin_loongarch_frecipe_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frecipe_d (double _1) ++{ ++ __builtin_loongarch_frecipe_d ((double) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: SF, SF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_s (float _1) ++{ ++ __builtin_loongarch_frsqrte_s ((float) _1); ++} ++ ++/* Assembly instruction format: fd, fj. */ ++/* Data types in instruction templates: DF, DF. */ ++extern __inline void ++__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) ++__frsqrte_d (double _1) ++{ ++ __builtin_loongarch_frsqrte_d ((double) _1); ++} ++#endif ++ + /* Assembly instruction format: ui15. */ + /* Data types in instruction templates: USI. */ + #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index de7c88f14..b1416f6c3 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -40,8 +40,10 @@ + UNSPEC_LASX_XVFCVTL + UNSPEC_LASX_XVFLOGB + UNSPEC_LASX_XVFRECIP ++ UNSPEC_LASX_XVFRECIPE + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT ++ UNSPEC_LASX_XVFRSQRTE + UNSPEC_LASX_XVFCMP_SAF + UNSPEC_LASX_XVFCMP_SEQ + UNSPEC_LASX_XVFCMP_SLE +@@ -1633,6 +1635,17 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "lasx_xvfrecipe_" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRECIPE))] ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrecipe.\t%u0,%u1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvfrsqrt_" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +@@ -1642,6 +1655,17 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "lasx_xvfrsqrte_" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRSQRTE))] ++ "ISA_HAS_LASX && TARGET_FRECIPE" ++ "xvfrsqrte.\t%u0,%u1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lasx_xvftint_u__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLASX 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h +index 7bce2c757..5e65e76e7 100644 +--- a/gcc/config/loongarch/lasxintrin.h ++++ b/gcc/config/loongarch/lasxintrin.h +@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1) + return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); + } + ++#if defined(__loongarch_frecipe) ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256 __lasx_xvfrecipe_s (__m256 _1) ++{ ++ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256d __lasx_xvfrecipe_d (__m256d _1) ++{ ++ return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V8SF, V8SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256 __lasx_xvfrsqrte_s (__m256 _1) ++{ ++ return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DF, V4DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m256d __lasx_xvfrsqrte_d (__m256d _1) ++{ ++ return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); ++} ++#endif ++ + /* Assembly instruction format: xd, xj. */ + /* Data types in instruction templates: V8SF, V8SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index f4523c8bf..bc156bd36 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -120,6 +120,9 @@ struct loongarch_builtin_description + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) + AVAIL_ALL (lsx, ISA_HAS_LSX) + AVAIL_ALL (lasx, ISA_HAS_LASX) ++AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) ++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + + /* Construct a loongarch_builtin_description from the given arguments. + +@@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_lsx } + ++ /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_ ++ for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description ++ field. AVAIL is the name of the availability predicate, without the leading ++ loongarch_builtin_avail_. */ ++#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ { CODE_FOR_lsx_ ## INSN, \ ++ "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ ++ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } ++ + + /* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_ + for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description +@@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ + FUNCTION_TYPE, loongarch_builtin_avail_lasx } + ++/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_ ++ for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description ++ field. AVAIL is the name of the availability predicate, without the leading ++ loongarch_builtin_avail_. */ ++#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ ++ { CODE_FOR_lasx_ ## INSN, \ ++ "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ ++ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } ++ + /* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_ + for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description + field. */ +@@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { + DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default), + DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default), + ++ /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */ ++ ++ DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe), ++ DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe), ++ DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe), ++ DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe), ++ ++ /* Built-in functions for new LSX instructions. */ ++ ++ LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), ++ LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), ++ ++ /* Built-in functions for new LASX instructions. */ ++ ++ LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), ++ LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), ++ + /* Built-in functions for LSX. */ + LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI), + LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI), +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 76c8ea8db..a89477a74 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + ++ if (TARGET_HARD_FLOAT && TARGET_FRECIPE) ++ builtin_define ("__loongarch_frecipe"); ++ + if (ISA_HAS_LSX) + { + builtin_define ("__loongarch_simd"); +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h +index 02ff16712..148333c24 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-cpucfg-map.h +@@ -29,6 +29,7 @@ static constexpr struct { + unsigned int cpucfg_bit; + HOST_WIDE_INT isa_evolution_bit; + } cpucfg_map[] = { ++ { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE }, + { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, + { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, + { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index bc6997e45..c41804a18 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -60,7 +60,8 @@ array_arch loongarch_cpu_default_isa = + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA +- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS ++ | OPTION_MASK_ISA_FRECIPE)); + + static inline loongarch_cache la464_cache () + { +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 7144bbe28..a8821acb0 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_EXPLICIT_RELOCS_NONE "none" + #define STR_EXPLICIT_RELOCS_ALWAYS "always" + ++#define OPTSTR_FRECIPE "frecipe" + #define OPTSTR_DIV32 "div32" + #define OPTSTR_LAM_BH "lam-bh" + #define OPTSTR_LAMCAS "lamcas" +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3c8ae9a42..ce1c0a8bd 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void) + loongarch_cpu_strings [la_target.cpu_tune]); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings [la_target.isa.base]); ++ DUMP_FEATURE (TARGET_FRECIPE); + DUMP_FEATURE (TARGET_DIV32); + DUMP_FEATURE (TARGET_LAM_BH); + DUMP_FEATURE (TARGET_LAMCAS); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index afc3c591f..9080cec1c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -59,6 +59,12 @@ + ;; Stack tie + UNSPEC_TIE + ++ ;; RSQRT ++ UNSPEC_RSQRTE ++ ++ ;; RECIP ++ UNSPEC_RECIPE ++ + ;; CRC + UNSPEC_CRC + UNSPEC_CRCC +@@ -220,6 +226,7 @@ + ;; fmadd floating point multiply-add + ;; fdiv floating point divide + ;; frdiv floating point reciprocal divide ++;; frecipe floating point approximate reciprocal + ;; fabs floating point absolute value + ;; flogb floating point exponent extract + ;; fneg floating point negation +@@ -229,6 +236,7 @@ + ;; fscaleb floating point scale + ;; fsqrt floating point square root + ;; frsqrt floating point reciprocal square root ++;; frsqrte floating point approximate reciprocal square root + ;; multi multiword sequence (or user asm statements) + ;; atomic atomic memory update instruction + ;; syncloop memory atomic operation implemented as a sync loop +@@ -238,8 +246,8 @@ + "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, + prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, + shift,slt,signext,clz,trap,imul,idiv,move, +- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, +- fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, ++ fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, ++ fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, + simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, + simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, + simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, +@@ -908,6 +916,18 @@ + [(set_attr "type" "frdiv") + (set_attr "mode" "")]) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "loongarch_frecipe_" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RECIPE))] ++ "TARGET_FRECIPE" ++ "frecipe.\t%0,%1" ++ [(set_attr "type" "frecipe") ++ (set_attr "mode" "") ++ (set_attr "insn_count" "1")]) ++ + ;; Integer division and modulus. + (define_expand "3" + [(set (match_operand:GPR 0 "register_operand") +@@ -1133,6 +1153,17 @@ + [(set_attr "type" "frsqrt") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) ++ ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "loongarch_frsqrte_" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RSQRTE))] ++ "TARGET_FRECIPE" ++ "frsqrte.\t%0,%1" ++ [(set_attr "type" "frsqrte") ++ (set_attr "mode" "")]) + + ;; + ;; .................... +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 7fe36feb9..e7bc8bed4 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -260,6 +260,10 @@ default value is 4. + Variable + HOST_WIDE_INT isa_evolution = 0 + ++mfrecipe ++Target Mask(ISA_FRECIPE) Var(isa_evolution) ++Support frecipe.{s/d} and frsqrte.{s/d} instructions. ++ + mdiv32 + Target Mask(ISA_DIV32) Var(isa_evolution) + Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index ce6ec6d69..37bdc6910 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -42,8 +42,10 @@ + UNSPEC_LSX_VFCVTL + UNSPEC_LSX_VFLOGB + UNSPEC_LSX_VFRECIP ++ UNSPEC_LSX_VFRECIPE + UNSPEC_LSX_VFRINT + UNSPEC_LSX_VFRSQRT ++ UNSPEC_LSX_VFRSQRTE + UNSPEC_LSX_VFCMP_SAF + UNSPEC_LSX_VFCMP_SEQ + UNSPEC_LSX_VFCMP_SLE +@@ -1546,6 +1548,17 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++;; Approximate Reciprocal Instructions. ++ ++(define_insn "lsx_vfrecipe_" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRECIPE))] ++ "ISA_HAS_LSX && TARGET_FRECIPE" ++ "vfrecipe.\t%w0,%w1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lsx_vfrsqrt_" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +@@ -1555,6 +1568,17 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + ++;; Approximate Reciprocal Square Root Instructions. ++ ++(define_insn "lsx_vfrsqrte_" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRSQRTE))] ++ "ISA_HAS_LSX && TARGET_FRECIPE" ++ "vfrsqrte.\t%w0,%w1" ++ [(set_attr "type" "simd_fdiv") ++ (set_attr "mode" "")]) ++ + (define_insn "lsx_vftint_u__" + [(set (match_operand: 0 "register_operand" "=f") + (unspec: [(match_operand:FLSX 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h +index 29553c093..57a6fc40a 100644 +--- a/gcc/config/loongarch/lsxintrin.h ++++ b/gcc/config/loongarch/lsxintrin.h +@@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1) + return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); + } + ++#if defined(__loongarch_frecipe) ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128 __lsx_vfrecipe_s (__m128 _1) ++{ ++ return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128d __lsx_vfrecipe_d (__m128d _1) ++{ ++ return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V4SF, V4SF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128 __lsx_vfrsqrte_s (__m128 _1) ++{ ++ return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: V2DF, V2DF. */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__m128d __lsx_vfrsqrte_d (__m128d _1) ++{ ++ return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); ++} ++#endif ++ + /* Assembly instruction format: vd, vj. */ + /* Data types in instruction templates: V4SF, V4SF. */ + extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 7edd3974d..bb042ae78 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16187,6 +16187,14 @@ The intrinsics provided are listed below: + void __builtin_loongarch_break (imm0_32767) + @end smallexample + ++These instrisic functions are available by using @option{-mfrecipe}. ++@smallexample ++ float __builtin_loongarch_frecipe_s (float); ++ double __builtin_loongarch_frecipe_d (double); ++ float __builtin_loongarch_frsqrte_s (float); ++ double __builtin_loongarch_frsqrte_d (double); ++@end smallexample ++ + @emph{Note:}Since the control register is divided into 32-bit and 64-bit, + but the access instruction is not distinguished. So GCC renames the control + instructions when implementing intrinsics. +@@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}. + void __break (imm0_32767) + @end smallexample + ++These instrisic functions are available by including @code{larchintrin.h} and ++using @option{-mfrecipe}. ++@smallexample ++ float __frecipe_s (float); ++ double __frecipe_d (double); ++ float __frsqrte_s (float); ++ double __frsqrte_d (double); ++@end smallexample ++ + Returns the value that is currently set in the @samp{tp} register. + @smallexample + void * __builtin_thread_pointer (void) +@@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255); + __m128i __lsx_vxor_v (__m128i, __m128i); + @end smallexample + ++These instrisic functions are available by including @code{lsxintrin.h} and ++using @option{-mfrecipe} and @option{-mlsx}. ++@smallexample ++__m128d __lsx_vfrecipe_d (__m128d); ++__m128 __lsx_vfrecipe_s (__m128); ++__m128d __lsx_vfrsqrte_d (__m128d); ++__m128 __lsx_vfrsqrte_s (__m128); ++@end smallexample ++ + @node LoongArch ASX Vector Intrinsics + @subsection LoongArch ASX Vector Intrinsics + +@@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255); + __m256i __lasx_xvxor_v (__m256i, __m256i); + @end smallexample + ++These instrisic functions are available by including @code{lasxintrin.h} and ++using @option{-mfrecipe} and @option{-mlasx}. ++@smallexample ++__m256d __lasx_xvfrecipe_d (__m256d); ++__m256 __lasx_xvfrecipe_s (__m256); ++__m256d __lasx_xvfrsqrte_d (__m256d); ++__m256 __lasx_xvfrsqrte_s (__m256); ++@end smallexample ++ + @node MIPS DSP Built-in Functions + @subsection MIPS DSP Built-in Functions + +diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c +new file mode 100644 +index 000000000..b9329f346 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c +@@ -0,0 +1,28 @@ ++/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mfrecipe" } */ ++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ ++ ++float ++test_frecipe_s (float _1) ++{ ++ return __builtin_loongarch_frecipe_s (_1); ++} ++double ++test_frecipe_d (double _1) ++{ ++ return __builtin_loongarch_frecipe_d (_1); ++} ++float ++test_frsqrte_s (float _1) ++{ ++ return __builtin_loongarch_frsqrte_s (_1); ++} ++double ++test_frsqrte_d (double _1) ++{ ++ return __builtin_loongarch_frsqrte_d (_1); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c +new file mode 100644 +index 000000000..522535b45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c +@@ -0,0 +1,30 @@ ++/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */ ++ ++#include ++ ++v8f32 ++__lasx_xvfrecipe_s (v8f32 _1) ++{ ++ return __builtin_lasx_xvfrecipe_s (_1); ++} ++v4f64 ++__lasx_xvfrecipe_d (v4f64 _1) ++{ ++ return __builtin_lasx_xvfrecipe_d (_1); ++} ++v8f32 ++__lasx_xvfrsqrte_s (v8f32 _1) ++{ ++ return __builtin_lasx_xvfrsqrte_s (_1); ++} ++v4f64 ++__lasx_xvfrsqrte_d (v4f64 _1) ++{ ++ return __builtin_lasx_xvfrsqrte_d (_1); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c +new file mode 100644 +index 000000000..4ad0cb0ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c +@@ -0,0 +1,30 @@ ++/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */ ++ ++#include ++ ++v4f32 ++__lsx_vfrecipe_s (v4f32 _1) ++{ ++ return __builtin_lsx_vfrecipe_s (_1); ++} ++v2f64 ++__lsx_vfrecipe_d (v2f64 _1) ++{ ++ return __builtin_lsx_vfrecipe_d (_1); ++} ++v4f32 ++__lsx_vfrsqrte_s (v4f32 _1) ++{ ++ return __builtin_lsx_vfrsqrte_s (_1); ++} ++v2f64 ++__lsx_vfrsqrte_d (v2f64 _1) ++{ ++ return __builtin_lsx_vfrsqrte_d (_1); ++} +-- +2.43.0 + diff --git a/0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch b/0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch new file mode 100644 index 0000000..e62d27e --- /dev/null +++ b/0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch @@ -0,0 +1,257 @@ +From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 6 Dec 2023 15:04:50 +0800 +Subject: [PATCH 060/188] LoongArch: Use standard pattern name for + xvfrsqrt/vfrsqrt instructions. + +Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt2 to align with standard +pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrsqrt_): Renamed to .. + (rsqrt2): .. this. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name. + (CODE_FOR_lsx_vfrsqrt_s): Ditto. + (CODE_FOR_lasx_xvfrsqrt_d): Ditto. + (CODE_FOR_lasx_xvfrsqrt_s): Ditto. + * config/loongarch/loongarch.cc (use_rsqrt_p): New function. + (loongarch_optab_supported_p): Ditto. + (TARGET_OPTAB_SUPPORTED_P): New hook. + * config/loongarch/loongarch.md (*rsqrta): Remove. + (*rsqrt2): New insn pattern. + (*rsqrtb): Remove. + * config/loongarch/lsx.md (lsx_vfrsqrt_): Renamed to .. + (rsqrt2): .. this. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test. +--- + gcc/config/loongarch/lasx.md | 6 ++--- + gcc/config/loongarch/loongarch-builtins.cc | 4 +++ + gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++ + gcc/config/loongarch/loongarch.md | 24 +++++------------ + gcc/config/loongarch/lsx.md | 6 ++--- + .../loongarch/vector/lasx/lasx-rsqrt.c | 26 ++++++++++++++++++ + .../loongarch/vector/lsx/lsx-rsqrt.c | 26 ++++++++++++++++++ + 7 files changed, 96 insertions(+), 23 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b1416f6c3..3a4a1fe51 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1646,10 +1646,10 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvfrsqrt_" ++(define_insn "rsqrt2" + [(set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRSQRT))] ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRSQRT))] + "ISA_HAS_LASX" + "xvfrsqrt.\t%u0,%u1" + [(set_attr "type" "simd_fdiv") +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index bc156bd36..4aae27a5e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h + #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d ++#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 ++#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu + #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du ++#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 ++#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 + + static const struct loongarch_builtin_description loongarch_builtins[] = { + #define LARCH_MOVFCSR2GR 0 +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ce1c0a8bd..95aa9453b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, + is_packed); + } + ++static bool ++use_rsqrt_p (void) ++{ ++ return (flag_finite_math_only ++ && !flag_trapping_math ++ && flag_unsafe_math_optimizations); ++} ++ ++/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ ++ ++static bool ++loongarch_optab_supported_p (int op, machine_mode, machine_mode, ++ optimization_type opt_type) ++{ ++ switch (op) ++ { ++ case rsqrt_optab: ++ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); ++ ++ default: ++ return true; ++ } ++} ++ + /* If -fverbose-asm, dump some info for debugging. */ + static void + loongarch_asm_code_end (void) +@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + ++#undef TARGET_OPTAB_SUPPORTED_P ++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p ++ + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9080cec1c..4dfe583e2 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -60,6 +60,7 @@ + UNSPEC_TIE + + ;; RSQRT ++ UNSPEC_RSQRT + UNSPEC_RSQRTE + + ;; RECIP +@@ -1134,25 +1135,14 @@ + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +-(define_insn "*rsqrta" ++(define_insn "*rsqrt2" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] +- "flag_unsafe_math_optimizations" +- "frsqrt.\t%0,%2" +- [(set_attr "type" "frsqrt") +- (set_attr "mode" "") +- (set_attr "insn_count" "1")]) +- +-(define_insn "*rsqrtb" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") +- (match_operand:ANYF 2 "register_operand" "f"))))] +- "flag_unsafe_math_optimizations" +- "frsqrt.\t%0,%2" ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_RSQRT))] ++ "TARGET_HARD_FLOAT" ++ "frsqrt.\t%0,%1" + [(set_attr "type" "frsqrt") +- (set_attr "mode" "") +- (set_attr "insn_count" "1")]) ++ (set_attr "mode" "")]) + + ;; Approximate Reciprocal Square Root Instructions. + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 37bdc6910..cb4a448e7 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1559,10 +1559,10 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "lsx_vfrsqrt_" ++(define_insn "rsqrt2" + [(set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +- UNSPEC_LSX_VFRSQRT))] ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRSQRT))] + "ISA_HAS_LSX" + "vfrsqrt.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +new file mode 100644 +index 000000000..24316944d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffast-math" } */ ++/* { dg-final { scan-assembler "xvfrsqrt.s" } } */ ++/* { dg-final { scan-assembler "xvfrsqrt.d" } } */ ++ ++extern float sqrtf (float); ++ ++float a[8], b[8]; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 8; i++) ++ a[i] = 1 / sqrtf (b[i]); ++} ++ ++extern double sqrt (double); ++ ++double da[4], db[4]; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 4; i++) ++ da[i] = 1 / sqrt (db[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c +new file mode 100644 +index 000000000..519cc4764 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffast-math" } */ ++/* { dg-final { scan-assembler "vfrsqrt.s" } } */ ++/* { dg-final { scan-assembler "vfrsqrt.d" } } */ ++ ++extern float sqrtf (float); ++ ++float a[4], b[4]; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 4; i++) ++ a[i] = 1 / sqrtf (b[i]); ++} ++ ++extern double sqrt (double); ++ ++double da[2], db[2]; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 2; i++) ++ da[i] = 1 / sqrt (db[i]); ++} +-- +2.43.0 + diff --git a/0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch b/0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch new file mode 100644 index 0000000..d616063 --- /dev/null +++ b/0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch @@ -0,0 +1,135 @@ +From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 6 Dec 2023 15:04:51 +0800 +Subject: [PATCH 061/188] LoongArch: Redefine pattern for xvfrecip/vfrecip + instructions. + +Redefine pattern for [x]vfrecip instructions use rtx code instead of unspec, and enable +[x]vfrecip instructions to be generated during auto-vectorization. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvfrecip_): Renamed to .. + (recip3): .. this. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine + to new pattern name. + (CODE_FOR_lsx_vfrecip_s): Ditto. + (CODE_FOR_lasx_xvfrecip_d): Ditto. + (CODE_FOR_lasx_xvfrecip_s): Ditto. + (loongarch_expand_builtin_direct): For the vector recip instructions, construct a + temporary parameter const1_vector. + * config/loongarch/lsx.md (lsx_vfrecip_): Renamed to .. + (recip3): .. this. + * config/loongarch/predicates.md (const_vector_1_operand): New predicate. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++ + gcc/config/loongarch/lsx.md | 8 ++++---- + gcc/config/loongarch/predicates.md | 4 ++++ + 4 files changed, 32 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3a4a1fe51..ad49a3ffb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1626,12 +1626,12 @@ + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvfrecip_" ++(define_insn "recip3" + [(set (match_operand:FLASX 0 "register_operand" "=f") +- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] +- UNSPEC_LASX_XVFRECIP))] ++ (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "") ++ (match_operand:FLASX 2 "register_operand" "f")))] + "ISA_HAS_LASX" +- "xvfrecip.\t%u0,%u1" ++ "xvfrecip.\t%u0,%u2" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 4aae27a5e..85849ed29 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d + #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 + #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 ++#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3 ++#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3 + + /* LoongArch ASX define CODE_FOR_lasx_mxxx */ + #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 +@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) + #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du + #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 + #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 ++#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3 ++#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3 + + static const struct loongarch_builtin_description loongarch_builtins[] = { + #define LARCH_MOVFCSR2GR 0 +@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, + if (has_target_p) + create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp))); + ++ /* For the vector reciprocal instructions, we need to construct a temporary ++ parameter const1_vector. */ ++ switch (icode) ++ { ++ case CODE_FOR_recipv8sf3: ++ case CODE_FOR_recipv4df3: ++ case CODE_FOR_recipv4sf3: ++ case CODE_FOR_recipv2df3: ++ loongarch_prepare_builtin_arg (&ops[2], exp, 0); ++ create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode); ++ return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); ++ ++ default: ++ break; ++ } ++ + /* Map the arguments to the other operands. */ + gcc_assert (opno + call_expr_nargs (exp) + == insn_data[icode].n_generator_args); +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index cb4a448e7..f2774f021 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1539,12 +1539,12 @@ + [(set_attr "type" "simd_fminmax") + (set_attr "mode" "")]) + +-(define_insn "lsx_vfrecip_" ++(define_insn "recip3" + [(set (match_operand:FLSX 0 "register_operand" "=f") +- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +- UNSPEC_LSX_VFRECIP))] ++ (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "") ++ (match_operand:FLSX 2 "register_operand" "f")))] + "ISA_HAS_LSX" +- "vfrecip.\t%w0,%w1" ++ "vfrecip.\t%w0,%w2" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 30a0dee9f..572550dbc 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -227,6 +227,10 @@ + (and (match_code "const_int,const_wide_int,const_double,const_vector") + (match_test "op == CONST1_RTX (GET_MODE (op))"))) + ++(define_predicate "const_vector_1_operand" ++ (and (match_code "const_vector") ++ (match_test "op == CONST1_RTX (GET_MODE (op))"))) ++ + (define_predicate "reg_or_1_operand" + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) +-- +2.43.0 + diff --git a/0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch b/0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch new file mode 100644 index 0000000..ad072c7 --- /dev/null +++ b/0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch @@ -0,0 +1,1096 @@ +From faac4efbee23e60691fc086a78284225ecf824a8 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 6 Dec 2023 15:04:52 +0800 +Subject: [PATCH 062/188] LoongArch: New options -mrecip and -mrecip= with + ffast-math. + +When both the -mrecip and -mfrecipe options are enabled, use approximate reciprocal +instructions and approximate reciprocal square root instructions with additional +Newton-Raphson steps to implement single precision floating-point division, square +root and reciprocal square root operations, for a better performance. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lasx.md (div3): New expander. + (*div3): Rename. + (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.cc (loongarch_option_override_internal): Set + recip_mask for -mrecip and -mrecip= options. + (loongarch_emit_swrsqrtsf): New function. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT + RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT + RECIP_MASK_ALL): New bitmasks. + (TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV + TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests. + * config/loongarch/loongarch.md (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/loongarch.opt (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lsx.md (div3): New expander. + (*div3): Rename. + (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate. + * doc/invoke.texi (LoongArch Options): Document new options. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/divf.c: New test. + * gcc.target/loongarch/recip-divf.c: New test. + * gcc.target/loongarch/recip-sqrtf.c: New test. + * gcc.target/loongarch/sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 11 + + gcc/config/loongarch/lasx.md | 53 ++++- + gcc/config/loongarch/loongarch-protos.h | 2 + + gcc/config/loongarch/loongarch.cc | 188 ++++++++++++++++++ + gcc/config/loongarch/loongarch.h | 18 ++ + gcc/config/loongarch/loongarch.md | 49 ++++- + gcc/config/loongarch/loongarch.opt | 11 + + gcc/config/loongarch/lsx.md | 53 ++++- + gcc/config/loongarch/predicates.md | 4 + + gcc/doc/invoke.texi | 55 ++++- + gcc/testsuite/gcc.target/loongarch/divf.c | 10 + + .../gcc.target/loongarch/recip-divf.c | 9 + + .../gcc.target/loongarch/recip-sqrtf.c | 23 +++ + gcc/testsuite/gcc.target/loongarch/sqrtf.c | 24 +++ + .../loongarch/vector/lasx/lasx-divf.c | 13 ++ + .../loongarch/vector/lasx/lasx-recip-divf.c | 12 ++ + .../loongarch/vector/lasx/lasx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lasx/lasx-recip.c | 24 +++ + .../loongarch/vector/lasx/lasx-sqrtf.c | 29 +++ + .../loongarch/vector/lsx/lsx-divf.c | 13 ++ + .../loongarch/vector/lsx/lsx-recip-divf.c | 12 ++ + .../loongarch/vector/lsx/lsx-recip-sqrtf.c | 28 +++ + .../loongarch/vector/lsx/lsx-recip.c | 24 +++ + .../loongarch/vector/lsx/lsx-sqrtf.c | 29 +++ + 24 files changed, 711 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index cd5e75e4f..102202b03 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -23,6 +23,9 @@ config/loongarch/loongarch-opts.h + HeaderInclude + config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ + ; ISA related options + ;; Base ISA + Enum +@@ -194,6 +197,14 @@ mexplicit-relocs + Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) + Use %reloc() assembly operators (for backward compatibility). + ++mrecip ++Target RejectNegative Var(loongarch_recip) ++Generate approximate reciprocal divide and square root for better throughput. ++ ++mrecip= ++Target RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index ad49a3ffb..eeac8cd98 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1194,7 +1194,25 @@ + [(set_attr "type" "simd_fmul") + (set_attr "mode" "")]) + +-(define_insn "div3" ++(define_expand "div3" ++ [(set (match_operand:FLASX 0 "register_operand") ++ (div:FLASX (match_operand:FLASX 1 "reg_or_vecotr_1_operand") ++ (match_operand:FLASX 2 "register_operand")))] ++ "ISA_HAS_LASX" ++{ ++ if (mode == V8SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], V8SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div3" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (div:FLASX (match_operand:FLASX 1 "register_operand" "f") + (match_operand:FLASX 2 "register_operand" "f")))] +@@ -1223,7 +1241,23 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "sqrt2" ++(define_expand "sqrt2" ++ [(set (match_operand:FLASX 0 "register_operand") ++ (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))] ++ "ISA_HAS_LASX" ++{ ++ if (mode == V8SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt2" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))] + "ISA_HAS_LASX" +@@ -1646,7 +1680,20 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "rsqrt2" ++(define_expand "rsqrt2" ++ [(set (match_operand:FLASX 0 "register_operand" "=f") ++ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] ++ UNSPEC_LASX_XVFRSQRT))] ++ "ISA_HAS_LASX" ++ { ++ if (mode == V8SFmode && TARGET_RECIP_VEC_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt2" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRT))] +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 51d38177b..117669e9f 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -220,5 +220,7 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); + extern tree loongarch_build_builtin_va_list (void); + + extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); ++extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); ++extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 95aa9453b..18326ce47 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7548,6 +7548,71 @@ loongarch_option_override_internal (struct gcc_options *opts, + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; ++ ++ /* -mrecip options. */ ++ static struct ++ { ++ const char *string; /* option name. */ ++ unsigned int mask; /* mask bits to set. */ ++ } ++ const recip_options[] = { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++ }; ++ ++ if (loongarch_recip_name) ++ { ++ char *p = ASTRDUP (loongarch_recip_name); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_options[i].string)) ++ { ++ mask = recip_options[i].mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ recip_mask &= ~mask; ++ else ++ recip_mask |= mask; ++ } ++ } ++ if (loongarch_recip) ++ recip_mask |= RECIP_MASK_ALL; ++ if (!TARGET_FRECIPE) ++ recip_mask = RECIP_MASK_NONE; + } + + +@@ -11470,6 +11535,126 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) + return force_reg (vec_mode, v); + } + ++/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point [reciprocal] square root. */ ++ ++void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) ++{ ++ rtx x0, e0, e1, e2, mhalf, monehalf; ++ REAL_VALUE_TYPE r; ++ int unspec; ++ ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ e1 = gen_reg_rtx (mode); ++ e2 = gen_reg_rtx (mode); ++ ++ real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL); ++ mhalf = const_double_from_real_value (r, SFmode); ++ ++ real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1); ++ monehalf = const_double_from_real_value (r, SFmode); ++ unspec = UNSPEC_RSQRTE; ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mhalf = loongarch_build_const_vector (mode, true, mhalf); ++ monehalf = loongarch_build_const_vector (mode, true, monehalf); ++ unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRSQRTE ++ : UNSPEC_LSX_VFRSQRTE; ++ } ++ ++ /* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) ++ sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) */ ++ ++ a = force_reg (mode, a); ++ ++ /* x0 = rsqrt(a) estimate. */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ unspec))); ++ ++ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ ++ if (!recip) ++ { ++ rtx zero = force_reg (mode, CONST0_RTX (mode)); ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ machine_mode imode = related_int_vector_mode (mode).require (); ++ rtx mask = gen_reg_rtx (imode); ++ emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero))); ++ emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, ++ gen_lowpart (mode, mask)))); ++ } ++ else ++ { ++ rtx target = emit_conditional_move (x0, { GT, a, zero, mode }, ++ x0, zero, mode, 0); ++ if (target != x0) ++ emit_move_insn (x0, target); ++ } ++ } ++ ++ /* e0 = x0 * a */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); ++ /* e1 = e0 * x0 */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); ++ ++ /* e2 = 1.5 - e1 * 0.5 */ ++ mhalf = force_reg (mode, mhalf); ++ monehalf = force_reg (mode, monehalf); ++ emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, ++ gen_rtx_NEG (mode, e1), ++ mhalf, monehalf))); ++ ++ if (recip) ++ /* res = e2 * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2))); ++ else ++ /* res = e2 * e0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0))); ++} ++ ++/* Use recipe instruction and Newton-Rhapson to compute the approximation of ++ a single precision floating point divide. */ ++ ++void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) ++{ ++ rtx x0, e0, mtwo; ++ REAL_VALUE_TYPE r; ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ int unspec = UNSPEC_RECIPE; ++ ++ real_arithmetic (&r, ABS_EXPR, &dconst2, NULL); ++ mtwo = const_double_from_real_value (r, SFmode); ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mtwo = loongarch_build_const_vector (mode, true, mtwo); ++ unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRECIPE ++ : UNSPEC_LSX_VFRECIPE; ++ } ++ ++ mtwo = force_reg (mode, mtwo); ++ ++ /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */ ++ ++ /* x0 = 1./b estimate. */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ unspec))); ++ /* 2.0 - b * x0 */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, ++ gen_rtx_NEG (mode, b), x0, mtwo))); ++ ++ /* x0 = a * x0 */ ++ if (a != CONST1_RTX (mode)) ++ emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); ++ ++ /* res = e0 * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++} ++ + static bool + loongarch_builtin_support_vector_misalignment (machine_mode mode, + const_tree type, +@@ -11665,6 +11850,9 @@ loongarch_asm_code_end (void) + #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + loongarch_autovectorize_vector_modes + ++#undef TARGET_OPTAB_SUPPORTED_P ++#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p ++ + #undef TARGET_INIT_BUILTINS + #define TARGET_INIT_BUILTINS loongarch_init_builtins + #undef TARGET_BUILTIN_DECL +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 8b28be0e4..fbc0f53e4 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -702,6 +702,24 @@ enum reg_class + && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ + || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)) + ++#define RECIP_MASK_NONE 0x00 ++#define RECIP_MASK_DIV 0x01 ++#define RECIP_MASK_SQRT 0x02 ++#define RECIP_MASK_RSQRT 0x04 ++#define RECIP_MASK_VEC_DIV 0x08 ++#define RECIP_MASK_VEC_SQRT 0x10 ++#define RECIP_MASK_VEC_RSQRT 0x20 ++#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ ++ | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ ++ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) ++ ++#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) ++ + /* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. */ + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4dfe583e2..c6edd1dda 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -893,9 +893,21 @@ + ;; Float division and modulus. + (define_expand "div3" + [(set (match_operand:ANYF 0 "register_operand") +- (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") +- (match_operand:ANYF 2 "register_operand")))] +- "") ++ (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") ++ (match_operand:ANYF 2 "register_operand")))] ++ "" ++{ ++ if (mode == SFmode ++ && TARGET_RECIP_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], SFmode); ++ DONE; ++ } ++}) + + (define_insn "*div3" + [(set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1126,7 +1138,23 @@ + ;; + ;; .................... + +-(define_insn "sqrt2" ++(define_expand "sqrt2" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand")))] ++ "" ++ { ++ if (mode == SFmode ++ && TARGET_RECIP_SQRT ++ && flag_unsafe_math_optimizations ++ && !optimize_insn_for_size_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 0); ++ DONE; ++ } ++ }) ++ ++(define_insn "*sqrt2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] + "" +@@ -1135,6 +1163,19 @@ + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + ++(define_expand "rsqrt2" ++ [(set (match_operand:ANYF 0 "register_operand") ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand")] ++ UNSPEC_RSQRT))] ++ "TARGET_HARD_FLOAT" ++{ ++ if (mode == SFmode && TARGET_RECIP_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 1); ++ DONE; ++ } ++}) ++ + (define_insn "*rsqrt2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index e7bc8bed4..56f6a9564 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -31,6 +31,9 @@ config/loongarch/loongarch-opts.h + HeaderInclude + config/loongarch/loongarch-str.h + ++TargetVariable ++unsigned int recip_mask = 0 ++ + ; ISA related options + ;; Base ISA + Enum +@@ -202,6 +205,14 @@ mexplicit-relocs + Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) + Use %reloc() assembly operators (for backward compatibility). + ++mrecip ++Target RejectNegative Var(loongarch_recip) ++Generate approximate reciprocal divide and square root for better throughput. ++ ++mrecip= ++Target RejectNegative Joined Var(loongarch_recip_name) ++Control generation of reciprocal estimates. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index f2774f021..dbdb42301 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1083,7 +1083,25 @@ + [(set_attr "type" "simd_fmul") + (set_attr "mode" "")]) + +-(define_insn "div3" ++(define_expand "div3" ++ [(set (match_operand:FLSX 0 "register_operand") ++ (div:FLSX (match_operand:FLSX 1 "reg_or_vecotr_1_operand") ++ (match_operand:FLSX 2 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode ++ && TARGET_RECIP_VEC_DIV ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math ++ && flag_unsafe_math_optimizations) ++ { ++ loongarch_emit_swdivsf (operands[0], operands[1], ++ operands[2], V4SFmode); ++ DONE; ++ } ++}) ++ ++(define_insn "*div3" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (div:FLSX (match_operand:FLSX 1 "register_operand" "f") + (match_operand:FLSX 2 "register_operand" "f")))] +@@ -1112,7 +1130,23 @@ + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "")]) + +-(define_insn "sqrt2" ++(define_expand "sqrt2" ++ [(set (match_operand:FLSX 0 "register_operand") ++ (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode ++ && TARGET_RECIP_VEC_SQRT ++ && flag_unsafe_math_optimizations ++ && optimize_insn_for_speed_p () ++ && flag_finite_math_only && !flag_trapping_math) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 0); ++ DONE; ++ } ++}) ++ ++(define_insn "*sqrt2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (sqrt:FLSX (match_operand:FLSX 1 "register_operand" "f")))] + "ISA_HAS_LSX" +@@ -1559,7 +1593,20 @@ + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + +-(define_insn "rsqrt2" ++(define_expand "rsqrt2" ++ [(set (match_operand:FLSX 0 "register_operand" "=f") ++ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] ++ UNSPEC_LSX_VFRSQRT))] ++ "ISA_HAS_LSX" ++{ ++ if (mode == V4SFmode && TARGET_RECIP_VEC_RSQRT) ++ { ++ loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 1); ++ DONE; ++ } ++}) ++ ++(define_insn "*rsqrt2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRT))] +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 572550dbc..88e54c915 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -235,6 +235,10 @@ + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) + ++(define_predicate "reg_or_vecotr_1_operand" ++ (ior (match_operand 0 "const_vector_1_operand") ++ (match_operand 0 "register_operand"))) ++ + ;; These are used in vec_merge, hence accept bitmask as const_int. + (define_predicate "const_exp_2_operand" + (and (match_code "const_int") +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 168f3d0db..76a8f20d1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1008,7 +1008,8 @@ Objective-C and Objective-C++ Dialects}. + -mmax-inline-memcpy-size=@var{n} @gol + -mexplicit-relocs -mno-explicit-relocs @gol + -mdirect-extern-access -mno-direct-extern-access @gol +--mcmodel=@var{code-model}} ++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol ++-mrecip -mrecip=@var{opt} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24633,6 +24634,58 @@ kernels, executables linked with @option{-static} or @option{-static-pie}. + @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or + @option{-fpic}. + ++@opindex mrecip ++@item -mrecip ++This option enables use of the reciprocal estimate and reciprocal square ++root estimate instructions with additional Newton-Raphson steps to increase ++precision instead of doing a divide or square root and divide for ++floating-point arguments. ++These instructions are generated only when @option{-funsafe-math-optimizations} ++is enabled together with @option{-ffinite-math-only} and ++@option{-fno-trapping-math}. ++This option is off by default. Before you can use this option, you must sure the ++target CPU supports frecipe and frsqrte instructions. ++Note that while the throughput of the sequence is higher than the throughput of ++the non-reciprocal instruction, the precision of the sequence can be decreased ++by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). ++ ++@opindex mrecip=opt ++@item -mrecip=@var{opt} ++This option controls which reciprocal estimate instructions ++may be used. @var{opt} is a comma-separated list of options, which may ++be preceded by a @samp{!} to invert the option: ++@table @samp ++@item all ++Enable all estimate instructions. ++ ++@item default ++Enable the default instructions, equivalent to @option{-mrecip}. ++ ++@item none ++Disable all estimate instructions, equivalent to @option{-mno-recip}. ++ ++@item div ++Enable the approximation for scalar division. ++ ++@item vec-div ++Enable the approximation for vectorized division. ++ ++@item sqrt ++Enable the approximation for scalar square root. ++ ++@item vec-sqrt ++Enable the approximation for vectorized square root. ++ ++@item rsqrt ++Enable the approximation for scalar reciprocal square root. ++ ++@item vec-rsqrt ++Enable the approximation for vectorized reciprocal square root. ++@end table ++ ++So, for example, @option{-mrecip=all,!sqrt} enables ++all of the reciprocal approximations, except for scalar square root. ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +diff --git a/gcc/testsuite/gcc.target/loongarch/divf.c b/gcc/testsuite/gcc.target/loongarch/divf.c +new file mode 100644 +index 000000000..6c831817c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/divf.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "fdiv.s" } } */ ++/* { dg-final { scan-assembler-not "frecipe.s" } } */ ++ ++float ++foo(float a, float b) ++{ ++ return a / b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/recip-divf.c b/gcc/testsuite/gcc.target/loongarch/recip-divf.c +new file mode 100644 +index 000000000..db5e3e488 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/recip-divf.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ ++/* { dg-final { scan-assembler "frecipe.s" } } */ ++ ++float ++foo(float a, float b) ++{ ++ return a / b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c +new file mode 100644 +index 000000000..7f45db6cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "frsqrte.s" 3 } } */ ++ ++extern float sqrtf (float); ++ ++float ++foo1 (float a, float b) ++{ ++ return a/sqrtf(b); ++} ++ ++float ++foo2 (float a, float b) ++{ ++ return sqrtf(a/b); ++} ++ ++float ++foo3 (float a) ++{ ++ return sqrtf(a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/sqrtf.c b/gcc/testsuite/gcc.target/loongarch/sqrtf.c +new file mode 100644 +index 000000000..c2720faac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sqrtf.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler-times "fsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "frsqrte.s" } } */ ++ ++extern float sqrtf (float); ++ ++float ++foo1 (float a, float b) ++{ ++ return a/sqrtf(b); ++} ++ ++float ++foo2 (float a, float b) ++{ ++ return sqrtf(a/b); ++} ++ ++float ++foo3 (float a) ++{ ++ return sqrtf(a); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c +new file mode 100644 +index 000000000..748a82200 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mrecip -mlasx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "xvfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "xvfrecipe.s" } } */ ++ ++float a[8],b[8],c[8]; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = a[i] / b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c +new file mode 100644 +index 000000000..6532756f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler "xvfrecipe.s" } } */ ++ ++float a[8],b[8],c[8]; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = a[i] / b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c +new file mode 100644 +index 000000000..a623dff8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "xvfrsqrte.s" 3 } } */ ++ ++float a[8], b[8], c[8]; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = a[i] / sqrtf (b[i]); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = sqrtf (a[i] / b[i]); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = sqrtf (a[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c +new file mode 100644 +index 000000000..083c86840 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler "xvfrecip.s" } } */ ++/* { dg-final { scan-assembler "xvfrecip.d" } } */ ++/* { dg-final { scan-assembler-not "xvfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "xvfdiv.d" } } */ ++ ++float a[8], b[8]; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 8; i++) ++ a[i] = 1 / (b[i]); ++} ++ ++double da[4], db[4]; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 4; i++) ++ da[i] = 1 / (db[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c +new file mode 100644 +index 000000000..a005a3886 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -fno-unsafe-math-optimizations -mrecip -mlasx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "xvfsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "xvfrsqrte.s" } } */ ++ ++float a[8], b[8], c[8]; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = a[i] / sqrtf (b[i]); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = sqrtf (a[i] / b[i]); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 8; i++) ++ c[i] = sqrtf (a[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c +new file mode 100644 +index 000000000..1219b1ef8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler "vfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "vfrecipe.s" } } */ ++ ++float a[4],b[4],c[4]; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = a[i] / b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c +new file mode 100644 +index 000000000..edbe8d909 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler "vfrecipe.s" } } */ ++ ++float a[4],b[4],c[4]; ++ ++void ++foo () ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = a[i] / b[i]; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c +new file mode 100644 +index 000000000..d356f915e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ ++/* { dg-final { scan-assembler-times "vfrsqrte.s" 3 } } */ ++ ++float a[4], b[4], c[4]; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = a[i] / sqrtf (b[i]); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = sqrtf (a[i] / b[i]); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = sqrtf (a[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c +new file mode 100644 +index 000000000..c4d6af4db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler "vfrecip.s" } } */ ++/* { dg-final { scan-assembler "vfrecip.d" } } */ ++/* { dg-final { scan-assembler-not "vfdiv.s" } } */ ++/* { dg-final { scan-assembler-not "vfdiv.d" } } */ ++ ++float a[4], b[4]; ++ ++void ++foo1(void) ++{ ++ for (int i = 0; i < 4; i++) ++ a[i] = 1 / (b[i]); ++} ++ ++double da[2], db[2]; ++ ++void ++foo2(void) ++{ ++ for (int i = 0; i < 2; i++) ++ da[i] = 1 / (db[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c +new file mode 100644 +index 000000000..3ff6570a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ ++/* { dg-final { scan-assembler-times "vfsqrt.s" 3 } } */ ++/* { dg-final { scan-assembler-not "vfrsqrte.s" } } */ ++ ++float a[4], b[4], c[4]; ++ ++extern float sqrtf (float); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = a[i] / sqrtf (b[i]); ++} ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = sqrtf (a[i] / b[i]); ++} ++ ++void ++foo3 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ c[i] = sqrtf (a[i]); ++} +-- +2.43.0 + diff --git a/0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch b/0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch new file mode 100644 index 0000000..752d5a4 --- /dev/null +++ b/0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch @@ -0,0 +1,83 @@ +From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 6 Dec 2023 15:04:53 +0800 +Subject: [PATCH 063/188] LoongArch: Vectorized loop unrolling is disable for + divf/sqrtf/rsqrtf when -mrecip is enabled. + +Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number +of generated instructions is close to or exceeds the maximum issue instructions per cycle of the +LoongArch, so vectorized loop unrolling is not performed on them. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor): + If m_has_recip is true, uf return 1. + (loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence. +--- + gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++-- + 1 file changed, 34 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 18326ce47..d64777179 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3970,7 +3970,9 @@ protected: + /* Reduction factor for suggesting unroll factor. */ + unsigned m_reduc_factor = 0; + /* True if the loop contains an average operation. */ +- bool m_has_avg =false; ++ bool m_has_avg = false; ++ /* True if the loop uses approximation instruction sequence. */ ++ bool m_has_recip = false; + }; + + /* Implement TARGET_VECTORIZE_CREATE_COSTS. */ +@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + { + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + +- if (m_has_avg) ++ if (m_has_avg || m_has_recip) + return 1; + + /* Don't unroll if it's specified explicitly not to be unrolled. */ +@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + } + } + ++ combined_fn cfn; ++ if (kind == vector_stmt ++ && stmt_info ++ && stmt_info->stmt) ++ { ++ /* Detect the use of approximate instruction sequence. */ ++ if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT) ++ && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) ++ switch (cfn) ++ { ++ case CFN_BUILT_IN_SQRTF: ++ m_has_recip = true; ++ default: ++ break; ++ } ++ else if (TARGET_RECIP_VEC_DIV ++ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) ++ { ++ machine_mode mode = TYPE_MODE (vectype); ++ switch (gimple_assign_rhs_code (stmt_info->stmt)) ++ { ++ case RDIV_EXPR: ++ if (GET_MODE_INNER (mode) == SFmode) ++ m_has_recip = true; ++ default: ++ break; ++ } ++ } ++ } ++ + return retval; + } + +-- +2.43.0 + diff --git a/0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch b/0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch new file mode 100644 index 0000000..cc4b80a --- /dev/null +++ b/0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch @@ -0,0 +1,130 @@ +From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Fri, 25 Oct 2024 02:45:35 +0000 +Subject: [PATCH 064/188] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests + fail on LA664 [PR112611] + +For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers +undefined behavior on LA464, but not on LA664. To ensure compatibility of these two +tests on both LA464 and LA664, we have modified both tests to ensure that the index +value in the selector does not exceed 63. + +gcc/testsuite/ChangeLog: + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. +--- + .../loongarch/vector/lasx/lasx-xvshuf_b.c | 14 +++++++------- + .../gcc.target/loongarch/vector/lsx/lsx-vshuf.c | 12 ++++++------ + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +index b8ab38711..910d29339 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +@@ -99,9 +99,9 @@ main () + *((unsigned long *)&__m256i_op1[2]) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op1[1]) = 0x7ff0000000000000; + *((unsigned long *)&__m256i_op1[0]) = 0x7ff0000000000000; +- *((unsigned long *)&__m256i_op2[3]) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op2[3]) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_op2[1]) = 0x3ff0010000000000; ++ *((unsigned long *)&__m256i_op2[1]) = 0x3f11010000000000; + *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +@@ -200,7 +200,7 @@ main () + *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; +- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m256i_result[1]) = 0xffffffff00000000; + *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); +@@ -351,7 +351,7 @@ main () + *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001; + *((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000000001; +- *((unsigned long *)&__m256i_result[2]) = 0x0000000200000000; ++ *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); +@@ -426,10 +426,10 @@ main () + *((unsigned long *)&__m256i_op2[2]) = 0x8000000080000000; + *((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff; + *((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result[3]) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result[3]) = 0xdfffffff80000000; + *((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff; +- *((unsigned long *)&__m256i_result[1]) = 0x8000000080000000; +- *((unsigned long *)&__m256i_result[0]) = 0x8000000080000000; ++ *((unsigned long *)&__m256i_result[1]) = 0x7fc0000000000000; ++ *((unsigned long *)&__m256i_result[0]) = 0x8000000000000000; + __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +index f3b800f88..93a3078fa 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +@@ -33,7 +33,7 @@ main () + *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000; + *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00000000; + __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -153,7 +153,7 @@ main () + *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461; + *((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff; + *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x00007fff00000000; + *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -198,7 +198,7 @@ main () + *((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0; + *((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29; + *((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29; +- *((unsigned long *)&__m128i_result[0]) = 0x0000000100000001; ++ *((unsigned long *)&__m128i_result[0]) = 0xffffff2900000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + +@@ -219,7 +219,7 @@ main () + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020; + *((unsigned long *)&__m128i_op2[0]) = 0x0000000020000020; +- *((unsigned long *)&__m128i_result[1]) = 0x2000002000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[0]) = 0x2000002020000020; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -241,7 +241,7 @@ main () + *((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010; + *((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000; + *((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103; +- *((unsigned long *)&__m128i_result[1]) = 0x0000010300000103; ++ *((unsigned long *)&__m128i_result[1]) = 0x8000000000000103; + *((unsigned long *)&__m128i_result[0]) = 0x0000010380000001; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +@@ -252,7 +252,7 @@ main () + *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff; +- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; ++ *((unsigned long *)&__m128i_result[1]) = 0xffffffff00000000; + *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff; + __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); +-- +2.43.0 + diff --git a/0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch b/0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch new file mode 100644 index 0000000..6e49789 --- /dev/null +++ b/0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch @@ -0,0 +1,318 @@ +From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 29 Nov 2023 11:18:00 +0800 +Subject: [PATCH 065/188] LoongArch: Fix ICE and use simplify_gen_subreg + instead of gen_rtx_SUBREG directly. + +loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported +in gcc, it causes an ICE: + +ice.c:55:1: error: unrecognizable insn: + 55 | } + | ^ +(insn 63 62 64 8 (set (reg:V4DI 278) + (subreg:V4DI (subreg:V4DF (reg:V4DI 273 [ vect__53.26 ]) 0) 0)) -1 + (nil)) +during RTL pass: vregs +ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804 + +Last time, Ruoyao has fixed a similar ICE: +https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html + +This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible +to avoid the same ice happening again. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use + simplify_gen_subreg instead of gen_rtx_SUBREG. + (loongarch_expand_vec_perm_const_2): Ditto. + (loongarch_expand_vec_cond_expr): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr112476-3.c: New test. + * gcc.target/loongarch/pr112476-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 79 +++++++++++-------- + .../gcc.target/loongarch/pr112476-3.c | 58 ++++++++++++++ + .../gcc.target/loongarch/pr112476-4.c | 4 + + 3 files changed, 108 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d64777179..4a3a7a246 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) + if (d->vmode == E_V2DFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else if (d->vmode == E_V4SFmode) + { + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + } + else +@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + /* Adjust op1 for selecting correct value in high 128bit of target + register. + op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x21))); + +@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + + /* Generate subreg for fitting into insn gen function. */ +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + + /* Adjust op value in temp register. + op0 = {0,1,2,3}, op1 = {4,5,0,1} */ +@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x02))); +@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + /* We can achieve the expectation by using sinple xvpermi.q insn. */ + emit_move_insn (conv_target, conv_op1); +@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + /* Adjust op value in temp regiter. + op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op1_alt, d->op1); + emit_move_insn (op0_alt, d->op0); + +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); ++ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, ++ d->vmode, 0); + + emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, + conv_op0, GEN_INT (0x13))); +@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ + if (!d->testing_p) + { +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx temp_reg = gen_reg_rtx (d->vmode); +- rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0); ++ rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, ++ d->vmode, 0); + + emit_move_insn (temp_reg, d->op0); + +@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); + + /* Duplicate op0's low 128bit in op0, then duplicate high 128bit + in op1. After this, xvshuf.* insn's selector argument can +@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + emit_move_insn (op0_alt, d->op0); + emit_move_insn (op1_alt, d->op1); + +- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); +- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); +- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); +- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); ++ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, ++ d->vmode, 0); ++ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, ++ d->vmode, 0); ++ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); ++ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + + /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure + that selector's low 128bit can access all op0's elements, and +@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + { + case E_V4DFmode: + sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break; + case E_V8SFmode: + sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0); ++ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); + emit_move_insn (tmp, sel); + break; + default: +@@ -10192,7 +10200,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + 64bit in target vector register. */ + else if (extract_ev_od) + { +- rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0); ++ rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); + emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, + GEN_INT (0xD8))); + } +@@ -11279,7 +11287,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop1 = gen_reg_rtx (vimode); +- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0)); ++ emit_move_insn (xop1, ++ simplify_gen_subreg (vimode, operands[1], ++ mode, 0)); + } + emit_move_insn (src1, xop1); + } +@@ -11296,7 +11306,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + if (mode != vimode) + { + xop2 = gen_reg_rtx (vimode); +- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0)); ++ emit_move_insn (xop2, ++ simplify_gen_subreg (vimode, operands[2], ++ mode, 0)); + } + emit_move_insn (src2, xop2); + } +@@ -11315,7 +11327,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, + gen_rtx_AND (vimode, mask, src1)); + /* The result is placed back to a register with the mask. */ + emit_insn (gen_rtx_SET (mask, bsel)); +- emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0)); ++ emit_move_insn (operands[0], ++ simplify_gen_subreg (mode, mask, vimode, 0)); + } + } + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-3.c b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c +new file mode 100644 +index 000000000..d696d4182 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlsx" } */ ++ ++#include ++ ++typedef int8_t orc_int8; ++typedef int16_t orc_int16; ++typedef int32_t orc_int32; ++typedef int64_t orc_int64; ++ ++typedef union ++{ ++ orc_int32 i; ++ float f; ++ orc_int16 x2[2]; ++ orc_int8 x4[4]; ++} orc_union32; ++typedef union ++{ ++ orc_int64 i; ++ double f; ++ orc_int32 x2[2]; ++ float x2f[2]; ++ orc_int16 x4[4]; ++} orc_union64; ++ ++void ++audio_orc_s32_to_double (double * restrict d1, ++ const signed int * restrict s1, int n) ++{ ++ int i; ++ orc_union64 *restrict ptr0; ++ const orc_union32 *restrict ptr4; ++ orc_union32 var33; ++ orc_union64 var34; ++ orc_union64 var35; ++ orc_union64 var36; ++ ++ ptr0 = (orc_union64 *) d1; ++ ptr4 = (orc_union32 *) s1; ++ ++ var34.i = 0x41e0000000000000UL; ++ ++ for (i = 0; i < n; i++) { ++ var33 = ptr4[i]; ++ var36.f = var33.i; ++ { ++ orc_union64 _src1; ++ orc_union64 _src2; ++ orc_union64 _dest1; ++ _src1.i = ((var36.i) & ((((var36.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ _src2.i = ((var34.i) & ((((var34.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ _dest1.f = _src1.f / _src2.f; ++ var35.i = ((_dest1.i) & ((((_dest1.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); ++ } ++ ptr0[i] = var35; ++ } ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-4.c b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c +new file mode 100644 +index 000000000..955d98552 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c +@@ -0,0 +1,4 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mlasx" } */ ++ ++#include "pr112476-3.c" +-- +2.43.0 + diff --git a/0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch b/0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch new file mode 100644 index 0000000..f418277 --- /dev/null +++ b/0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch @@ -0,0 +1,236 @@ +From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Fri, 8 Dec 2023 18:01:18 +0800 +Subject: [PATCH 066/188] LoongArch: Fix eh_return epilogue for normal returns. + +On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved +and restored in the function prologue and epilogue if the given function calls +__builtin_eh_return. This causes the return value to be overwritten on normal +return paths and breaks a rare case of libgcc's _Unwind_RaiseException. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: Do not restore the saved eh_return + data registers ($r4-$r7) for a normal return of a function that calls + __builtin_eh_return elsewhere. + * config/loongarch/loongarch-protos.h: Same. + * config/loongarch/loongarch.md: Same. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/eh_return-normal-return.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 34 ++++++++++++----- + gcc/config/loongarch/loongarch.md | 23 ++++++++++- + .../loongarch/eh_return-normal-return.c | 38 +++++++++++++++++++ + 4 files changed, 84 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 117669e9f..e5fcf3111 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -60,7 +60,7 @@ enum loongarch_symbol_type { + extern rtx loongarch_emit_move (rtx, rtx); + extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); + extern void loongarch_expand_prologue (void); +-extern void loongarch_expand_epilogue (bool); ++extern void loongarch_expand_epilogue (int); + extern bool loongarch_can_use_return_insn (void); + + extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4a3a7a246..7caf04d8d 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, + + static void + loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, +- loongarch_save_restore_fn fn) ++ loongarch_save_restore_fn fn, ++ bool skip_eh_data_regs_p) + { + HOST_WIDE_INT offset; + +@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { +- if (!cfun->machine->reg_is_wrapped_separately[regno]) ++ /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO) ++ when returning normally from a function that calls ++ __builtin_eh_return. In this case, these registers are saved but ++ should not be restored, or the return value may be clobbered. */ ++ ++ if (!(cfun->machine->reg_is_wrapped_separately[regno] ++ || (skip_eh_data_regs_p ++ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))) + loongarch_save_restore_reg (word_mode, regno, offset, fn); + + offset -= UNITS_PER_WORD; +@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void) + GEN_INT (-step1)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + size -= step1; +- loongarch_for_each_saved_reg (size, loongarch_save_reg); ++ loongarch_for_each_saved_reg (size, loongarch_save_reg, false); + } + + /* Set up the frame pointer, if we're using one. */ +@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void) + return reload_completed && cfun->machine->frame.total_size == 0; + } + +-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P +- says which. */ ++/* Expand function epilogue using the following insn patterns: ++ "epilogue" (style == NORMAL_RETURN) ++ "sibcall_epilogue" (style == SIBCALL_RETURN) ++ "eh_return" (style == EXCEPTION_RETURN) */ + + void +-loongarch_expand_epilogue (bool sibcall_p) ++loongarch_expand_epilogue (int style) + { + /* Split the frame into two. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we +@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p) + bool need_barrier_p + = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; + +- if (!sibcall_p && loongarch_can_use_return_insn ()) ++ /* Handle simple returns. */ ++ if (style == NORMAL_RETURN && loongarch_can_use_return_insn ()) + { + emit_jump_insn (gen_return ()); + return; +@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p) + + /* Restore the registers. */ + loongarch_for_each_saved_reg (frame->total_size - step2, +- loongarch_restore_reg); ++ loongarch_restore_reg, ++ crtl->calls_eh_return ++ && style != EXCEPTION_RETURN); + + if (need_barrier_p) + loongarch_emit_stack_tie (); +@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p) + } + + /* Add in the __builtin_eh_return stack adjustment. */ +- if (crtl->calls_eh_return) ++ if (crtl->calls_eh_return && style == EXCEPTION_RETURN) + emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + +- if (!sibcall_p) ++ /* Emit return unless doing sibcall. */ ++ if (style != SIBCALL_RETURN) + emit_jump_insn (gen_simple_return_internal (ra)); + } + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index c6edd1dda..222f1ae83 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -125,6 +125,11 @@ + (T1_REGNUM 13) + (S0_REGNUM 23) + ++ ;; Return path styles ++ (NORMAL_RETURN 0) ++ (SIBCALL_RETURN 1) ++ (EXCEPTION_RETURN 2) ++ + ;; PIC long branch sequences are never longer than 100 bytes. + (MAX_PIC_BRANCH_LENGTH 100) + ]) +@@ -3276,7 +3281,7 @@ + [(const_int 2)] + "" + { +- loongarch_expand_epilogue (false); ++ loongarch_expand_epilogue (NORMAL_RETURN); + DONE; + }) + +@@ -3284,7 +3289,7 @@ + [(const_int 2)] + "" + { +- loongarch_expand_epilogue (true); ++ loongarch_expand_epilogue (SIBCALL_RETURN); + DONE; + }) + +@@ -3341,6 +3346,20 @@ + emit_insn (gen_eh_set_ra_di (operands[0])); + else + emit_insn (gen_eh_set_ra_si (operands[0])); ++ ++ emit_jump_insn (gen_eh_return_internal ()); ++ emit_barrier (); ++ DONE; ++}) ++ ++(define_insn_and_split "eh_return_internal" ++ [(eh_return)] ++ "" ++ "#" ++ "epilogue_completed" ++ [(const_int 0)] ++{ ++ loongarch_expand_epilogue (EXCEPTION_RETURN); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +new file mode 100644 +index 000000000..f8f3965f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++int foo () __attribute__((noinline)); ++int main (); ++ ++int ++foo () { ++ ++ int t; ++ ++ /* prevent optimization using asm */ ++ asm ("" : "=r" (t) : "0" (-1)); ++ asm ("" : "=r" (t) : "0" (t ? 1 : 0)); ++ ++ if (t == 0) ++ /* never reached */ ++ __builtin_eh_return (0, __builtin_return_address (0)); ++ ++ else if (t == 1) ++ /* return here */ ++ return 202312; ++ ++ else ++ /* never reached: prevent vrp optimization in main */ ++ return 0; ++} ++ ++int ++main () ++{ ++ if (foo() == 202312) ++ return 0; ++ else ++ abort (); ++} +-- +2.43.0 + diff --git a/0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch b/0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch new file mode 100644 index 0000000..84b07ab --- /dev/null +++ b/0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch @@ -0,0 +1,180 @@ +From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 7 Dec 2023 15:45:30 +0800 +Subject: [PATCH 067/188] LoongArch: Allow -mcmodel=extreme and model attribute + with -mexplicit-relocs=auto + +There seems no real reason to require -mexplicit-relocs=always for +-mcmodel=extreme or model attribute. As the linker does not know how to +relax a 3-operand la.local or la.global pseudo instruction, just emit +explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also +SYMBOL_GOT_DISP. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP + if TARGET_CMODEL_EXTREME. + (loongarch_split_symbol): Check for la_opt_explicit_relocs != + EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS. + (loongarch_print_operand_reloc): Likewise. + (loongarch_option_override_internal): Likewise. + (loongarch_handle_model_attribute): Likewise. + * doc/invoke.texi (-mcmodel=extreme): Update the compatibility + between it and -mexplicit-relocs=. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-3.c: New test. + * gcc.target/loongarch/attr-model-4.c: New test. + * gcc.target/loongarch/func-call-extreme-3.c: New test. + * gcc.target/loongarch/func-call-extreme-4.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 25 ++++++++++++------- + gcc/doc/invoke.texi | 4 +-- + .../gcc.target/loongarch/attr-model-3.c | 6 +++++ + .../gcc.target/loongarch/attr-model-4.c | 6 +++++ + .../loongarch/func-call-extreme-3.c | 7 ++++++ + .../loongarch/func-call-extreme-4.c | 7 ++++++ + 6 files changed, 44 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7caf04d8d..4362149ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + case SYMBOL_TLS_LE: + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +- /* The linker don't know how to relax TLS accesses. */ ++ case SYMBOL_PCREL64: ++ /* The linker don't know how to relax TLS accesses or 64-bit ++ pc-relative accesses. */ + return true; + case SYMBOL_GOT_DISP: ++ /* The linker don't know how to relax GOT accesses in extreme ++ code model. */ ++ if (TARGET_CMODEL_EXTREME) ++ return true; ++ + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + + if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + temp1 = gen_reg_rtx (Pmode); + emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), +@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + loongarch_classify_symbolic_expression (op); + + if (loongarch_symbol_extreme_p (symbol_type)) +- gcc_assert (TARGET_EXPLICIT_RELOCS); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + + switch (symbol_type) + { +@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts, + switch (la_target.cmodel) + { + case CMODEL_EXTREME: +- if (!TARGET_EXPLICIT_RELOCS) +- error ("code model %qs needs %s", +- "extreme", "-mexplicit-relocs=always"); ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) ++ error ("code model %qs is not compatible with %s", ++ "extreme", "-mexplicit-relocs=none"); + + if (opts->x_flag_plt) + { +@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + *no_add_attrs = true; + return NULL_TREE; + } +- if (!TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + { + error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute requires %s", name, +- "-mexplicit-relocs=always"); ++ "%qE attribute is not compatible with %s", name, ++ "-mexplicit-relocs=none"); + *no_add_attrs = true; + return NULL_TREE; + } +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 76a8f20d1..5c6515cb1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space. + + @item extreme + This mode does not limit the size of the code segment and data segment. +-The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and +-@option{-mno-explicit-relocs}. ++The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} ++and/or @option{-mexplicit-relocs=none}. + @end table + The default code model is @code{normal}. + +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +new file mode 100644 +index 000000000..5622d5086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +new file mode 100644 +index 000000000..482724bb9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */ ++/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +new file mode 100644 +index 000000000..a4da44b4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +new file mode 100644 +index 000000000..16b00f4c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ ++/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ ++ ++#include "func-call-extreme-1.c" +-- +2.43.0 + diff --git a/0068-LoongArch-Fix-warnings-building-libgcc.patch b/0068-LoongArch-Fix-warnings-building-libgcc.patch new file mode 100644 index 0000000..47a113d --- /dev/null +++ b/0068-LoongArch-Fix-warnings-building-libgcc.patch @@ -0,0 +1,79 @@ +From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 9 Dec 2023 22:08:37 +0800 +Subject: [PATCH 068/188] LoongArch: Fix warnings building libgcc + +We are excluding loongarch-opts.h from target libraries, but now struct +loongarch_target and gcc_options are not declared in the target +libraries, causing: + +In file included from ../.././gcc/options.h:8, + from ../.././gcc/tm.h:49, + from ../../../gcc/libgcc/fixed-bit.c:48: +../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41: +warning: 'struct gcc_options' declared inside parameter list will not +be visible outside of this definition or declaration + 57 | struct gcc_options *opts, + | ^~~~~~~~~~~ + +So exclude the declarations referring to the C++ structs as well. + +gcc/ChangeLog: + + * config/loongarch/loongarch-opts.h (la_target): Move into #if + for loongarch-def.h. + (loongarch_init_target): Likewise. + (loongarch_config_target): Likewise. + (loongarch_update_gcc_opt_status): Likewise. +--- + gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 7010ddfec..639ed50bd 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3. If not see + #ifndef LOONGARCH_OPTS_H + #define LOONGARCH_OPTS_H + +-/* This is a C++ header and it shouldn't be used by target libraries. */ ++/* The loongarch-def.h file is a C++ header and it shouldn't be used by ++ target libraries. Exclude it and everything using the C++ structs ++ (struct loongarch_target and gcc_options) from target libraries. */ + #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + #include "loongarch-def.h" +-#endif + + /* Target configuration */ + extern struct loongarch_target la_target; + +-/* Flag status */ +-struct loongarch_flags { +- int flt; const char* flt_str; +-#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) +- int sx[2]; +-}; +- +- + /* Initialize loongarch_target from separate option variables. */ + void + loongarch_init_target (struct loongarch_target *target, +@@ -56,7 +49,14 @@ void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); ++#endif + ++/* Flag status */ ++struct loongarch_flags { ++ int flt; const char* flt_str; ++#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) ++ int sx[2]; ++}; + + /* Macros for common conditional expressions used in loongarch.{c,h,md} */ + #define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL) +-- +2.43.0 + diff --git a/0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch b/0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch new file mode 100644 index 0000000..a7ef474 --- /dev/null +++ b/0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch @@ -0,0 +1,30 @@ +From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 13 Dec 2023 02:39:35 +0800 +Subject: [PATCH 069/188] LoongArch: testsuite: Remove XFAIL in + vect-ftint-no-inexact.c + +After r14-6455 this no longer fails. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove. +--- + gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +index 83d268099..61918beef 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +@@ -39,6 +39,5 @@ + /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ + /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ + +-/* trunc: XFAIL due to PR 107723 */ +-/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ + /* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ +-- +2.43.0 + diff --git a/0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch b/0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch new file mode 100644 index 0000000..e0ff042 --- /dev/null +++ b/0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch @@ -0,0 +1,44 @@ +From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 9 Dec 2023 15:27:28 +0800 +Subject: [PATCH 070/188] LoongArch: Include rtl.h for COSTS_N_INSNS instead of + hard coding our own + +With loongarch-def.cc switched from C to C++, we can include rtl.h for +COSTS_N_INSNS, instead of hard coding our own. + +THis is a non-functional change for now, but it will make the code more +future-proof in case COSTS_N_INSNS in rtl.h would be changed. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.cc (rtl.h): Include. + (COSTS_N_INSNS): Remove the macro definition. +--- + gcc/config/loongarch/loongarch-def.cc | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index c41804a18..6217b1926 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + #include "system.h" + #include "coretypes.h" + #include "tm.h" ++#include "rtl.h" + + #include "loongarch-def.h" + #include "loongarch-str.h" +@@ -89,8 +90,6 @@ array_tune loongarch_cpu_align = + .set (CPU_LA464, la464_align ()) + .set (CPU_LA664, la464_align ()); + +-#define COSTS_N_INSNS(N) ((N) * 4) +- + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () + : fp_add (COSTS_N_INSNS (1)), +-- +2.43.0 + diff --git a/0071-LoongArch-Fix-instruction-costs-PR112936.patch b/0071-LoongArch-Fix-instruction-costs-PR112936.patch new file mode 100644 index 0000000..8a3eb78 --- /dev/null +++ b/0071-LoongArch-Fix-instruction-costs-PR112936.patch @@ -0,0 +1,165 @@ +From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 9 Dec 2023 17:41:32 +0800 +Subject: [PATCH 071/188] LoongArch: Fix instruction costs [PR112936] + +Replace the instruction costs in loongarch_rtx_cost_data constructor +based on micro-benchmark results on LA464 and LA664. + +This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl +and slli. + +gcc/ChangeLog: + + PR target/112936 + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update + instruction costs per micro-benchmark results. + (loongarch_rtx_cost_optimize_size): Set all instruction costs + to (COSTS_N_INSNS (1) + 1). + * config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove + special case for multiplication when optimizing for size. + Adjust division cost when TARGET_64BIT && !TARGET_DIV32. + Account the extra cost when TARGET_CHECK_ZERO_DIV and + optimizing for speed. + +gcc/testsuite/ChangeLog + + PR target/112936 + * gcc.target/loongarch/mul-const-reduction.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 39 ++++++++++--------- + gcc/config/loongarch/loongarch.cc | 22 +++++------ + .../loongarch/mul-const-reduction.c | 11 ++++++ + 3 files changed, 43 insertions(+), 29 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 6217b1926..4a8885e83 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -92,15 +92,15 @@ array_tune loongarch_cpu_align = + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +- : fp_add (COSTS_N_INSNS (1)), +- fp_mult_sf (COSTS_N_INSNS (2)), +- fp_mult_df (COSTS_N_INSNS (4)), +- fp_div_sf (COSTS_N_INSNS (6)), ++ : fp_add (COSTS_N_INSNS (5)), ++ fp_mult_sf (COSTS_N_INSNS (5)), ++ fp_mult_df (COSTS_N_INSNS (5)), ++ fp_div_sf (COSTS_N_INSNS (8)), + fp_div_df (COSTS_N_INSNS (8)), +- int_mult_si (COSTS_N_INSNS (1)), +- int_mult_di (COSTS_N_INSNS (1)), +- int_div_si (COSTS_N_INSNS (4)), +- int_div_di (COSTS_N_INSNS (6)), ++ int_mult_si (COSTS_N_INSNS (4)), ++ int_mult_di (COSTS_N_INSNS (4)), ++ int_div_si (COSTS_N_INSNS (5)), ++ int_div_di (COSTS_N_INSNS (5)), + branch_cost (6), + memory_latency (4) {} + +@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + array_tune loongarch_cpu_rtx_cost_data = + array_tune (); + +-/* RTX costs to use when optimizing for size. */ ++/* RTX costs to use when optimizing for size. ++ We use a value slightly larger than COSTS_N_INSNS (1) for all of them ++ because they are slower than simple instructions. */ ++#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1) + const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + loongarch_rtx_cost_data () +- .fp_add_ (4) +- .fp_mult_sf_ (4) +- .fp_mult_df_ (4) +- .fp_div_sf_ (4) +- .fp_div_df_ (4) +- .int_mult_si_ (4) +- .int_mult_di_ (4) +- .int_div_si_ (4) +- .int_div_di_ (4); ++ .fp_add_ (COST_COMPLEX_INSN) ++ .fp_mult_sf_ (COST_COMPLEX_INSN) ++ .fp_mult_df_ (COST_COMPLEX_INSN) ++ .fp_div_sf_ (COST_COMPLEX_INSN) ++ .fp_div_df_ (COST_COMPLEX_INSN) ++ .int_mult_si_ (COST_COMPLEX_INSN) ++ .int_mult_di_ (COST_COMPLEX_INSN) ++ .int_div_si_ (COST_COMPLEX_INSN) ++ .int_div_di_ (COST_COMPLEX_INSN); + + array_tune loongarch_cpu_issue_rate = array_tune () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 4362149ef..afbb55390 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + *total = (speed + ? loongarch_cost->int_mult_si * 3 + 6 + : COSTS_N_INSNS (7)); +- else if (!speed) +- *total = COSTS_N_INSNS (1) + 1; + else if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else +@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + + case UDIV: + case UMOD: +- if (!speed) +- { +- *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); +- } +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_div_di; + else +- *total = loongarch_cost->int_div_si; ++ { ++ *total = loongarch_cost->int_div_si; ++ if (TARGET_64BIT && !TARGET_DIV32) ++ *total += COSTS_N_INSNS (2); ++ } ++ ++ if (TARGET_CHECK_ZERO_DIV) ++ *total += COSTS_N_INSNS (2); ++ + return false; + + case SIGN_EXTEND: +@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { +- if (!speed) +- *total = COSTS_N_INSNS (1) + 1; +- else if (mode == DImode) ++ if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else + *total = loongarch_cost->int_mult_si; +diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +new file mode 100644 +index 000000000..02d9a4876 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464" } */ ++/* { dg-final { scan-assembler "alsl\.w" } } */ ++/* { dg-final { scan-assembler "slli\.w" } } */ ++/* { dg-final { scan-assembler-not "mul\.w" } } */ ++ ++int ++test (int a) ++{ ++ return a * 68; ++} +-- +2.43.0 + diff --git a/0072-LoongArch-Add-alslsi3_extend.patch b/0072-LoongArch-Add-alslsi3_extend.patch new file mode 100644 index 0000000..a0c7d03 --- /dev/null +++ b/0072-LoongArch-Add-alslsi3_extend.patch @@ -0,0 +1,53 @@ +From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 9 Dec 2023 18:02:35 +0800 +Subject: [PATCH 072/188] LoongArch: Add alslsi3_extend + +Following the instruction cost fix, we are generating + + alsl.w $a0, $a0, $a0, 4 + +instead of + + li.w $t0, 17 + mul.w $a0, $t0 + +for "x * 4", because alsl.w is 4 times faster than mul.w. But we didn't +have a sign-extending pattern for alsl.w, causing an extra slli.w +instruction generated to sign-extend $a0. Add the pattern to remove the +redundant extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (alslsi3_extend): New + define_insn. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 222f1ae83..23368008e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2874,6 +2874,18 @@ + [(set_attr "type" "arith") + (set_attr "mode" "")]) + ++(define_insn "alslsi3_extend" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (plus:SI ++ (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand 2 "const_immalsl_operand" "")) ++ (match_operand:SI 3 "register_operand" "r"))))] ++ "" ++ "alsl.w\t%0,%1,%3,%2" ++ [(set_attr "type" "arith") ++ (set_attr "mode" "SI")]) ++ + + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. +-- +2.43.0 + diff --git a/0073-LoongArch-Add-support-for-D-frontend.patch b/0073-LoongArch-Add-support-for-D-frontend.patch new file mode 100644 index 0000000..b953765 --- /dev/null +++ b/0073-LoongArch-Add-support-for-D-frontend.patch @@ -0,0 +1,224 @@ +From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001 +From: liushuyu +Date: Mon, 18 Dec 2023 09:52:07 +0800 +Subject: [PATCH 073/188] LoongArch: Add support for D frontend. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-d.o to d_target_objs for LoongArch + architecture. + * config/loongarch/t-loongarch: Add object target for loongarch-d.cc. + * config/loongarch/loongarch-d.cc + (loongarch_d_target_versions): add interface function to define builtin + D versions for LoongArch architecture. + (loongarch_d_handle_target_float_abi): add interface function to define + builtin D traits for LoongArch architecture. + (loongarch_d_register_target_info): add interface function to register + loongarch_d_handle_target_float_abi function. + * config/loongarch/loongarch-d.h + (loongarch_d_target_versions): add function prototype. + (loongarch_d_register_target_info): Likewise. + +libphobos/ChangeLog: + + * configure.tgt: Enable libphobos for LoongArch architecture. + * libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for + LoongArch64. + * libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for + LoongArch64. +--- + gcc/config.gcc | 1 + + gcc/config/loongarch/loongarch-d.cc | 77 ++++++++++++++++++++++ + gcc/config/loongarch/loongarch-d.h | 26 ++++++++ + gcc/config/loongarch/t-loongarch | 4 ++ + libphobos/configure.tgt | 3 + + libphobos/libdruntime/gcc/sections/elf.d | 2 + + libphobos/libdruntime/gcc/unwind/generic.d | 1 + + 7 files changed, 114 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch-d.cc + create mode 100644 gcc/config/loongarch/loongarch-d.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 11ab620d0..039187fa2 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -456,6 +456,7 @@ mips*-*-*) + ;; + loongarch*-*-*) + cpu_type=loongarch ++ d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" + extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" +diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc +new file mode 100644 +index 000000000..9ac483c39 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.cc +@@ -0,0 +1,77 @@ ++/* Subroutines for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm_d.h" ++#include "d/d-target.h" ++#include "d/d-target-def.h" ++ ++/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets. */ ++ ++void ++loongarch_d_target_versions (void) ++{ ++ if (TARGET_64BIT) ++ d_add_builtin_version ("LoongArch64"); ++ else ++ d_add_builtin_version ("LoongArch32"); ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_HardFloat"); ++ d_add_builtin_version ("D_HardFloat"); ++ } ++ else if (TARGET_SOFT_FLOAT_ABI) ++ { ++ d_add_builtin_version ("LoongArch_SoftFloat"); ++ d_add_builtin_version ("D_SoftFloat"); ++ } ++} ++ ++/* Handle a call to `__traits(getTargetInfo, "floatAbi")'. */ ++ ++static tree ++loongarch_d_handle_target_float_abi (void) ++{ ++ const char *abi; ++ ++ if (TARGET_HARD_FLOAT_ABI) ++ abi = "hard"; ++ else if (TARGET_SOFT_FLOAT_ABI) ++ abi = "soft"; ++ else ++ abi = ""; ++ ++ return build_string_literal (strlen (abi) + 1, abi); ++} ++ ++/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO. */ ++ ++void ++loongarch_d_register_target_info (void) ++{ ++ const struct d_target_info_spec handlers[] = { ++ {"floatAbi", loongarch_d_handle_target_float_abi}, ++ {NULL, NULL}, ++ }; ++ ++ d_add_target_info_handlers (handlers); ++} +diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h +new file mode 100644 +index 000000000..a2fb8d51d +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-d.h +@@ -0,0 +1,26 @@ ++/* Definitions for the D front end on the LoongArch architecture. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Defined in loongarch-d.cc */ ++extern void ++loongarch_d_target_versions (void); ++extern void ++loongarch_d_register_target_info (void); ++ ++/* Target hooks for D language. */ ++#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions ++#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index a1a40431f..994f4d19c 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + ++loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ + $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true + s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/genopts/loongarch.opt.in \ +diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt +index 0063dd232..dcb1551cd 100644 +--- a/libphobos/configure.tgt ++++ b/libphobos/configure.tgt +@@ -36,6 +36,9 @@ case "${target}" in + hppa-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; ++ loongarch*-*-linux*) ++ LIBPHOBOS_SUPPORTED=yes ++ ;; + mips*-*-linux*) + LIBPHOBOS_SUPPORTED=yes + ;; +diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d +index 5819811f3..bc993ea49 100644 +--- a/libphobos/libdruntime/gcc/sections/elf.d ++++ b/libphobos/libdruntime/gcc/sections/elf.d +@@ -1061,6 +1061,8 @@ else version (MIPS64) + enum TLS_DTV_OFFSET = 0x8000; + else version (IBMZ_Any) + enum TLS_DTV_OFFSET = 0x0; ++else version (LoongArch64) ++ enum TLS_DTV_OFFSET = 0x0; + else + static assert( false, "Platform not supported." ); + +diff --git a/libphobos/libdruntime/gcc/unwind/generic.d b/libphobos/libdruntime/gcc/unwind/generic.d +index 929b75dc7..8e5db80e1 100644 +--- a/libphobos/libdruntime/gcc/unwind/generic.d ++++ b/libphobos/libdruntime/gcc/unwind/generic.d +@@ -141,6 +141,7 @@ else version (SPARC64) private enum __aligned__ = 16; + else version (SystemZ) private enum __aligned__ = 8; + else version (X86) private enum __aligned__ = 16; + else version (X86_64) private enum __aligned__ = 16; ++else version (LoongArch64) private enum __aligned__ = 16; + else static assert( false, "Platform not supported."); + + align(__aligned__) struct _Unwind_Exception +-- +2.43.0 + diff --git a/0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch b/0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch new file mode 100644 index 0000000..7313fd5 --- /dev/null +++ b/0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch @@ -0,0 +1,156 @@ +From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Fri, 8 Dec 2023 18:09:41 +0800 +Subject: [PATCH 074/188] libruntime: Add fiber context switch code for + LoongArch. + +libphobos/ChangeLog: + + * libdruntime/config/loongarch/switchcontext.S: New file. +--- + .../config/loongarch/switchcontext.S | 133 ++++++++++++++++++ + 1 file changed, 133 insertions(+) + create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S + +diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S +new file mode 100644 +index 000000000..edfb9b67e +--- /dev/null ++++ b/libphobos/libdruntime/config/loongarch/switchcontext.S +@@ -0,0 +1,133 @@ ++/* LoongArch support code for fibers and multithreading. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++. */ ++ ++#include "../common/threadasm.S" ++ ++/** ++ * Performs a context switch. ++ * ++ * $a0 - void** - ptr to old stack pointer ++ * $a1 - void* - new stack pointer ++ * ++ */ ++ ++#if defined(__loongarch_lp64) ++# define GPR_L ld.d ++# define GPR_S st.d ++# define SZ_GPR 8 ++# define ADDSP(si) addi.d $sp, $sp, si ++#elif defined(__loongarch64_ilp32) ++# define GPR_L ld.w ++# define GPR_S st.w ++# define SZ_GPR 4 ++# define ADDSP(si) addi.w $sp, $sp, si ++#else ++# error Unsupported GPR size (must be 64-bit or 32-bit). ++#endif ++ ++#if defined(__loongarch_double_float) ++# define FPR_L fld.d ++# define FPR_S fst.d ++# define SZ_FPR 8 ++#elif defined(__loongarch_single_float) ++# define FPR_L fld.s ++# define FPR_S fst.s ++# define SZ_FPR 4 ++#else ++# define SZ_FPR 0 ++#endif ++ ++ .text ++ .align 2 ++ .global fiber_switchContext ++ .type fiber_switchContext, @function ++fiber_switchContext: ++ .cfi_startproc ++ ADDSP(-11 * SZ_GPR) ++ ++ // fp regs and return address are stored below the stack ++ // because we don't want the GC to scan them. ++ ++ // return address (r1) ++ GPR_S $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ // callee-saved scratch FPRs (f24-f31) ++ FPR_S $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_S $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_S $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_S $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_S $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_S $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_S $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_S $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ // callee-saved GPRs (r21, fp (r22), r23-r31) ++ GPR_S $r21, $sp, 0*SZ_GPR ++ GPR_S $fp, $sp, 1*SZ_GPR ++ GPR_S $s0, $sp, 2*SZ_GPR ++ GPR_S $s1, $sp, 3*SZ_GPR ++ GPR_S $s2, $sp, 4*SZ_GPR ++ GPR_S $s3, $sp, 5*SZ_GPR ++ GPR_S $s4, $sp, 6*SZ_GPR ++ GPR_S $s5, $sp, 7*SZ_GPR ++ GPR_S $s6, $sp, 8*SZ_GPR ++ GPR_S $s7, $sp, 9*SZ_GPR ++ GPR_S $s8, $sp, 10*SZ_GPR ++ ++ // swap stack pointer ++ GPR_S $sp, $a0, 0 ++ move $sp, $a1 ++ ++ GPR_L $r1, $sp, -SZ_GPR ++ ++#if SZ_FPR != 0 ++ FPR_L $f24, $sp, -SZ_GPR-1*SZ_FPR ++ FPR_L $f25, $sp, -SZ_GPR-2*SZ_FPR ++ FPR_L $f26, $sp, -SZ_GPR-3*SZ_FPR ++ FPR_L $f27, $sp, -SZ_GPR-4*SZ_FPR ++ FPR_L $f28, $sp, -SZ_GPR-5*SZ_FPR ++ FPR_L $f29, $sp, -SZ_GPR-6*SZ_FPR ++ FPR_L $f30, $sp, -SZ_GPR-7*SZ_FPR ++ FPR_L $f31, $sp, -SZ_GPR-8*SZ_FPR ++#endif ++ ++ GPR_L $r21, $sp, 0*SZ_GPR ++ GPR_L $fp, $sp, 1*SZ_GPR ++ GPR_L $s0, $sp, 2*SZ_GPR ++ GPR_L $s1, $sp, 3*SZ_GPR ++ GPR_L $s2, $sp, 4*SZ_GPR ++ GPR_L $s3, $sp, 5*SZ_GPR ++ GPR_L $s4, $sp, 6*SZ_GPR ++ GPR_L $s5, $sp, 7*SZ_GPR ++ GPR_L $s6, $sp, 8*SZ_GPR ++ GPR_L $s7, $sp, 9*SZ_GPR ++ GPR_L $s8, $sp, 10*SZ_GPR ++ ++ ADDSP(11 * SZ_GPR) ++ ++ jr $r1 // return ++ .cfi_endproc ++ .size fiber_switchContext,.-fiber_switchContext +-- +2.43.0 + diff --git a/0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch b/0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch new file mode 100644 index 0000000..b9b652c --- /dev/null +++ b/0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch @@ -0,0 +1,866 @@ +From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 17 Dec 2023 01:09:20 +0800 +Subject: [PATCH 075/188] LoongArch: Fix FP vector comparsons [PR113034] + +We had the following mappings between vfcmp submenmonics and RTX +codes: + + (define_code_attr fcc + [(unordered "cun") + (ordered "cor") + (eq "ceq") + (ne "cne") + (uneq "cueq") + (unle "cule") + (unlt "cult") + (le "cle") + (lt "clt")]) + +This is inconsistent with scalar code: + + (define_code_attr fcond [(unordered "cun") + (uneq "cueq") + (unlt "cult") + (unle "cule") + (eq "ceq") + (lt "slt") + (le "sle") + (ordered "cor") + (ltgt "sne") + (ne "cune") + (ge "sge") + (gt "sgt") + (unge "cuge") + (ungt "cugt")]) + +For every RTX code for which the LSX/LASX code is different from the +scalar code, the scalar code is correct and the LSX/LASX code is wrong. +Most seriously, the RTX code NE should be mapped to "cneq", not "cne". +Rewrite vfcmp define_insns in simd.md using the same mapping as +scalar fcmp. + +Note that GAS does not support [x]vfcmp.{c/s}[u]{ge/gt} (pseudo) +instruction (although fcmp.{c/s}[u]{ge/gt} is supported), so we need to +switch the order of inputs and use [x]vfcmp.{c/s}[u]{le/lt} instead. + +The vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single +RTX code, but they can be modeled as an inversed RTX code following a +"not" operation. Doing so allows the compiler to optimized vectorized +__builtin_isless etc. to a single instruction. This optimization should +be added for scalar code too and I'll do it later. + +Tests are added for mapping between C code, IEC 60559 operations, and +vfcmp instructions. + +[1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html + +gcc/ChangeLog: + + PR target/113034 + * config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove. + (lasx_xvfcmp_caf_): Remove. + (lasx_xvfcmp_cune_): Remove. + (FSC256_UNS): Remove. + (fsc256): Remove. + (lasx_xvfcmp__): Remove. + (lasx_xvfcmp__): Remove. + * config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove. + (lsx_vfcmp_caf_): Remove. + (lsx_vfcmp_cune_): Remove. + (vfcond): Remove. + (fcc): Remove. + (FSC_UNS): Remove. + (fsc): Remove. + (lsx_vfcmp__): Remove. + (lsx_vfcmp__): Remove. + * config/loongarch/simd.md + (fcond_simd): New define_code_iterator. + (_vfcmp__): + New define_insn. + (fcond_simd_rev): New define_code_iterator. + (fcond_rev_asm): New define_code_attr. + (_vfcmp__): + New define_insn. + (fcond_inv): New define_code_iterator. + (fcond_inv_rev): New define_code_iterator. + (fcond_inv_rev_asm): New define_code_attr. + (_vfcmp__): New define_insn. + (_vfcmp__): + New define_insn. + (UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF, + UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN, + UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE, + UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs. + (SIMD_FCMP): New define_int_iterator. + (fcond_unspec): New define_int_attr. + (_vfcmp__): New define_insn. + * config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp): + Remove unneeded special cases. + +gcc/testsuite/ChangeLog: + + PR target/113034 + * gcc.target/loongarch/vfcmp-f.c: New test. + * gcc.target/loongarch/vfcmp-d.c: New test. + * gcc.target/loongarch/xvfcmp-f.c: New test. + * gcc.target/loongarch/xvfcmp-d.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune + instead of cne. + * gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise. +--- + gcc/config/loongarch/lasx.md | 76 -------- + gcc/config/loongarch/loongarch.cc | 60 +----- + gcc/config/loongarch/lsx.md | 83 -------- + gcc/config/loongarch/simd.md | 118 ++++++++++++ + .../loongarch/vector/lasx/lasx-vcond-2.c | 4 +- + .../loongarch/vector/lsx/lsx-vcond-2.c | 4 +- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 28 +++ + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 178 ++++++++++++++++++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 29 +++ + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 27 +++ + 10 files changed, 385 insertions(+), 222 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index eeac8cd98..921ce0eeb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -32,9 +32,7 @@ + UNSPEC_LASX_XVBITREVI + UNSPEC_LASX_XVBITSET + UNSPEC_LASX_XVBITSETI +- UNSPEC_LASX_XVFCMP_CAF + UNSPEC_LASX_XVFCLASS +- UNSPEC_LASX_XVFCMP_CUNE + UNSPEC_LASX_XVFCVT + UNSPEC_LASX_XVFCVTH + UNSPEC_LASX_XVFCVTL +@@ -44,17 +42,6 @@ + UNSPEC_LASX_XVFRINT + UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE +- UNSPEC_LASX_XVFCMP_SAF +- UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT +- UNSPEC_LASX_XVFCMP_SNE +- UNSPEC_LASX_XVFCMP_SOR +- UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT +- UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SUNE + UNSPEC_LASX_XVFTINT_U + UNSPEC_LASX_XVCLO + UNSPEC_LASX_XVSAT_S +@@ -1481,69 +1468,6 @@ + [(set_attr "type" "simd_fclass") + (set_attr "mode" "")]) + +-(define_insn "lasx_xvfcmp_caf_" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVFCMP_CAF))] +- "ISA_HAS_LASX" +- "xvfcmp.caf.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +-(define_insn "lasx_xvfcmp_cune_" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f")] +- UNSPEC_LASX_XVFCMP_CUNE))] +- "ISA_HAS_LASX" +- "xvfcmp.cune.\t%u0,%u1,%u2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +- +- +-(define_int_iterator FSC256_UNS [UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN +- UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ +- UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ +- UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE +- UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE +- UNSPEC_LASX_XVFCMP_SLT]) +- +-(define_int_attr fsc256 +- [(UNSPEC_LASX_XVFCMP_SAF "saf") +- (UNSPEC_LASX_XVFCMP_SUN "sun") +- (UNSPEC_LASX_XVFCMP_SOR "sor") +- (UNSPEC_LASX_XVFCMP_SEQ "seq") +- (UNSPEC_LASX_XVFCMP_SNE "sne") +- (UNSPEC_LASX_XVFCMP_SUEQ "sueq") +- (UNSPEC_LASX_XVFCMP_SUNE "sune") +- (UNSPEC_LASX_XVFCMP_SULE "sule") +- (UNSPEC_LASX_XVFCMP_SULT "sult") +- (UNSPEC_LASX_XVFCMP_SLE "sle") +- (UNSPEC_LASX_XVFCMP_SLT "slt")]) +- +-(define_insn "lasx_xvfcmp__" +- [(set (match_operand: 0 "register_operand" "=f") +- (vfcond: (match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +- "xvfcmp..\t%u0,%u1,%u2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +- +-(define_insn "lasx_xvfcmp__" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLASX 1 "register_operand" "f") +- (match_operand:FLASX 2 "register_operand" "f")] +- FSC256_UNS))] +- "ISA_HAS_LASX" +- "xvfcmp..\t%u0,%u1,%u2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +- + (define_mode_attr fint256 + [(V8SF "v8si") + (V4DF "v4di")]) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index afbb55390..a22601d88 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -11156,7 +11156,6 @@ static void + loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + { + machine_mode cmp_mode = GET_MODE (op0); +- int unspec = -1; + bool negate = false; + + switch (cmp_mode) +@@ -11198,66 +11197,9 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1) + + case E_V4SFmode: + case E_V2DFmode: +- switch (cond) +- { +- case UNORDERED: +- case ORDERED: +- case EQ: +- case NE: +- case UNEQ: +- case UNLE: +- case UNLT: +- break; +- case LTGT: cond = NE; break; +- case UNGE: cond = UNLE; std::swap (op0, op1); break; +- case UNGT: cond = UNLT; std::swap (op0, op1); break; +- case LE: unspec = UNSPEC_LSX_VFCMP_SLE; break; +- case LT: unspec = UNSPEC_LSX_VFCMP_SLT; break; +- case GE: unspec = UNSPEC_LSX_VFCMP_SLE; std::swap (op0, op1); break; +- case GT: unspec = UNSPEC_LSX_VFCMP_SLT; std::swap (op0, op1); break; +- default: +- gcc_unreachable (); +- } +- if (unspec < 0) +- loongarch_emit_binary (cond, dest, op0, op1); +- else +- { +- rtx x = gen_rtx_UNSPEC (GET_MODE (dest), +- gen_rtvec (2, op0, op1), unspec); +- emit_insn (gen_rtx_SET (dest, x)); +- } +- break; +- + case E_V8SFmode: + case E_V4DFmode: +- switch (cond) +- { +- case UNORDERED: +- case ORDERED: +- case EQ: +- case NE: +- case UNEQ: +- case UNLE: +- case UNLT: +- break; +- case LTGT: cond = NE; break; +- case UNGE: cond = UNLE; std::swap (op0, op1); break; +- case UNGT: cond = UNLT; std::swap (op0, op1); break; +- case LE: unspec = UNSPEC_LASX_XVFCMP_SLE; break; +- case LT: unspec = UNSPEC_LASX_XVFCMP_SLT; break; +- case GE: unspec = UNSPEC_LASX_XVFCMP_SLE; std::swap (op0, op1); break; +- case GT: unspec = UNSPEC_LASX_XVFCMP_SLT; std::swap (op0, op1); break; +- default: +- gcc_unreachable (); +- } +- if (unspec < 0) +- loongarch_emit_binary (cond, dest, op0, op1); +- else +- { +- rtx x = gen_rtx_UNSPEC (GET_MODE (dest), +- gen_rtvec (2, op0, op1), unspec); +- emit_insn (gen_rtx_SET (dest, x)); +- } ++ loongarch_emit_binary (cond, dest, op0, op1); + break; + + default: +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index dbdb42301..57e0ee3d4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -34,9 +34,7 @@ + UNSPEC_LSX_VBITSETI + UNSPEC_LSX_BRANCH_V + UNSPEC_LSX_BRANCH +- UNSPEC_LSX_VFCMP_CAF + UNSPEC_LSX_VFCLASS +- UNSPEC_LSX_VFCMP_CUNE + UNSPEC_LSX_VFCVT + UNSPEC_LSX_VFCVTH + UNSPEC_LSX_VFCVTL +@@ -46,17 +44,6 @@ + UNSPEC_LSX_VFRINT + UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE +- UNSPEC_LSX_VFCMP_SAF +- UNSPEC_LSX_VFCMP_SEQ +- UNSPEC_LSX_VFCMP_SLE +- UNSPEC_LSX_VFCMP_SLT +- UNSPEC_LSX_VFCMP_SNE +- UNSPEC_LSX_VFCMP_SOR +- UNSPEC_LSX_VFCMP_SUEQ +- UNSPEC_LSX_VFCMP_SULE +- UNSPEC_LSX_VFCMP_SULT +- UNSPEC_LSX_VFCMP_SUN +- UNSPEC_LSX_VFCMP_SUNE + UNSPEC_LSX_VFTINT_U + UNSPEC_LSX_VSAT_S + UNSPEC_LSX_VSAT_U +@@ -1377,76 +1364,6 @@ + [(set_attr "type" "simd_fclass") + (set_attr "mode" "")]) + +-(define_insn "lsx_vfcmp_caf_" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f")] +- UNSPEC_LSX_VFCMP_CAF))] +- "ISA_HAS_LSX" +- "vfcmp.caf.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +-(define_insn "lsx_vfcmp_cune_" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f")] +- UNSPEC_LSX_VFCMP_CUNE))] +- "ISA_HAS_LSX" +- "vfcmp.cune.\t%w0,%w1,%w2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +-(define_code_iterator vfcond [unordered ordered eq ne le lt uneq unle unlt]) +- +-(define_code_attr fcc +- [(unordered "cun") +- (ordered "cor") +- (eq "ceq") +- (ne "cne") +- (uneq "cueq") +- (unle "cule") +- (unlt "cult") +- (le "cle") +- (lt "clt")]) +- +-(define_int_iterator FSC_UNS [UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SUN UNSPEC_LSX_VFCMP_SOR +- UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SNE UNSPEC_LSX_VFCMP_SUEQ +- UNSPEC_LSX_VFCMP_SUNE UNSPEC_LSX_VFCMP_SULE UNSPEC_LSX_VFCMP_SULT +- UNSPEC_LSX_VFCMP_SLE UNSPEC_LSX_VFCMP_SLT]) +- +-(define_int_attr fsc +- [(UNSPEC_LSX_VFCMP_SAF "saf") +- (UNSPEC_LSX_VFCMP_SUN "sun") +- (UNSPEC_LSX_VFCMP_SOR "sor") +- (UNSPEC_LSX_VFCMP_SEQ "seq") +- (UNSPEC_LSX_VFCMP_SNE "sne") +- (UNSPEC_LSX_VFCMP_SUEQ "sueq") +- (UNSPEC_LSX_VFCMP_SUNE "sune") +- (UNSPEC_LSX_VFCMP_SULE "sule") +- (UNSPEC_LSX_VFCMP_SULT "sult") +- (UNSPEC_LSX_VFCMP_SLE "sle") +- (UNSPEC_LSX_VFCMP_SLT "slt")]) +- +-(define_insn "lsx_vfcmp__" +- [(set (match_operand: 0 "register_operand" "=f") +- (vfcond: (match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f")))] +- "ISA_HAS_LSX" +- "vfcmp..\t%w0,%w1,%w2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- +-(define_insn "lsx_vfcmp__" +- [(set (match_operand: 0 "register_operand" "=f") +- (unspec: [(match_operand:FLSX 1 "register_operand" "f") +- (match_operand:FLSX 2 "register_operand" "f")] +- FSC_UNS))] +- "ISA_HAS_LSX" +- "vfcmp..\t%w0,%w1,%w2" +- [(set_attr "type" "simd_fcmp") +- (set_attr "mode" "")]) +- + (define_mode_attr fint + [(V4SF "v4si") + (V2DF "v2di")]) +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 843b1a41f..13202f79b 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -279,6 +279,124 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + ++;; vfcmp.*.{s/d} with defined RTX code ++;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have ++;; to reverse the operands ourselves :(. ++(define_code_iterator fcond_simd [unordered uneq unlt unle eq lt le ++ ordered ltgt ne]) ++(define_insn "_vfcmp__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (fcond_simd: ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")))] ++ "" ++ "vfcmp..\t%0,%1,%2" ++ [(set_attr "type" "simd_fcmp") ++ (set_attr "mode" "")]) ++ ++;; There are no fcmp.{sge/sgt/cuge/cugt}.{s/d} menmonics in GAS, so we have ++;; to reverse the operands ourselves. ++(define_code_iterator fcond_simd_rev [ge gt unge ungt]) ++ ++(define_code_attr fcond_rev_asm ++ [(ge "sle") ++ (gt "slt") ++ (unge "cule") ++ (ungt "cult")]) ++ ++(define_insn "_vfcmp__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (fcond_simd_rev: ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")))] ++ "" ++ "vfcmp..\t%0,%2,%1"; ++ [(set_attr "type" "simd_fcmp") ++ (set_attr "mode" "")]) ++ ++;; vfcmp.*.{s/d} without defined RTX code, but with defined RTX code for ++;; its inverse. Again, there are no fcmp.{sugt/suge/cgt/cge}.{s/d} ++;; menmonics in GAS, so we have to reverse the operands ourselves. ++(define_code_iterator fcond_inv [ge gt unge ungt]) ++(define_code_iterator fcond_inv_rev [le lt unle unlt]) ++(define_code_attr fcond_inv ++ [(ge "sult") ++ (gt "sule") ++ (unge "clt") ++ (ungt "cle") ++ (le "sugt") ++ (lt "suge") ++ (unle "cgt") ++ (unlt "cge")]) ++(define_code_attr fcond_inv_rev_asm ++ [(le "sult") ++ (lt "sule") ++ (unle "clt") ++ (unlt "cle")]) ++ ++(define_insn "_vfcmp__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (not: ++ (fcond_inv: ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f"))))] ++ "" ++ "vfcmp..\t%0,%1,%2" ++ [(set_attr "type" "simd_fcmp") ++ (set_attr "mode" "")]) ++ ++(define_insn "_vfcmp__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (not: ++ (fcond_inv_rev: ++ (match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f"))))] ++ "" ++ "vfcmp..\t%0,%2,%1" ++ [(set_attr "type" "simd_fcmp") ++ (set_attr "mode" "")]) ++ ++;; vfcmp.*.{s/d} instructions only as instrinsics ++(define_c_enum "unspec" ++ [UNSPEC_SIMD_FCMP_CAF ++ UNSPEC_SIMD_FCMP_SAF ++ UNSPEC_SIMD_FCMP_SEQ ++ UNSPEC_SIMD_FCMP_SUN ++ UNSPEC_SIMD_FCMP_SUEQ ++ UNSPEC_SIMD_FCMP_CNE ++ UNSPEC_SIMD_FCMP_SOR ++ UNSPEC_SIMD_FCMP_SUNE]) ++ ++(define_int_iterator SIMD_FCMP ++ [UNSPEC_SIMD_FCMP_CAF ++ UNSPEC_SIMD_FCMP_SAF ++ UNSPEC_SIMD_FCMP_SEQ ++ UNSPEC_SIMD_FCMP_SUN ++ UNSPEC_SIMD_FCMP_SUEQ ++ UNSPEC_SIMD_FCMP_CNE ++ UNSPEC_SIMD_FCMP_SOR ++ UNSPEC_SIMD_FCMP_SUNE]) ++ ++(define_int_attr fcond_unspec ++ [(UNSPEC_SIMD_FCMP_CAF "caf") ++ (UNSPEC_SIMD_FCMP_SAF "saf") ++ (UNSPEC_SIMD_FCMP_SEQ "seq") ++ (UNSPEC_SIMD_FCMP_SUN "sun") ++ (UNSPEC_SIMD_FCMP_SUEQ "sueq") ++ (UNSPEC_SIMD_FCMP_CNE "cne") ++ (UNSPEC_SIMD_FCMP_SOR "sor") ++ (UNSPEC_SIMD_FCMP_SUNE "sune")]) ++ ++(define_insn "_vfcmp__" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (unspec: [(match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")] ++ SIMD_FCMP))] ++ "" ++ "vfcmp..\t%0,%1,%2" ++ [(set_attr "type" "simd_fcmp") ++ (set_attr "mode" "")]) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +index 55d5a084c..f2f523622 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c +@@ -69,8 +69,8 @@ TEST_CMP (nugt) + + /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */ +-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.d} 3 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */ + /* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +index 2214afd0a..486bedba4 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c +@@ -69,8 +69,8 @@ TEST_CMP (nugt) + + /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */ +-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.s} 3 } } */ ++/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.d} 3 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */ + /* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +new file mode 100644 +index 000000000..8b870ef38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#define F double ++#define I long long ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +new file mode 100644 +index 000000000..b9110b90c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +@@ -0,0 +1,178 @@ ++/* Test mapping IEC 60559 operations to SIMD instructions. ++ For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#ifndef F ++#define F float ++#endif ++ ++#ifndef I ++#define I int ++#endif ++ ++#ifndef VL ++#define VL 16 ++#endif ++ ++typedef F VF __attribute__ ((vector_size (VL))); ++typedef I VI __attribute__ ((vector_size (VL))); ++ ++register VF a asm ("f0"); ++register VF b asm ("f1"); ++register VI c asm ("f2"); ++ ++void ++compare_quiet_equal (void) ++{ ++ c = (a == b); ++} ++ ++void ++compare_quiet_not_equal (void) ++{ ++ c = (a != b); ++} ++ ++void ++compare_signaling_greater (void) ++{ ++ c = (a > b); ++} ++ ++void ++compare_signaling_greater_equal (void) ++{ ++ c = (a >= b); ++} ++ ++void ++compare_signaling_less (void) ++{ ++ c = (a < b); ++} ++ ++void ++compare_signaling_less_equal (void) ++{ ++ c = (a <= b); ++} ++ ++void ++compare_signaling_not_greater (void) ++{ ++ c = ~(a > b); ++} ++ ++void ++compare_signaling_less_unordered (void) ++{ ++ c = ~(a >= b); ++} ++ ++void ++compare_signaling_not_less (void) ++{ ++ c = ~(a < b); ++} ++ ++void ++compare_signaling_greater_unordered (void) ++{ ++ c = ~(a <= b); ++} ++ ++void ++compare_quiet_less (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isless (a[i], b[i]) ? -1 : 0; ++} ++ ++void ++compare_quiet_less_equal (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_islessequal (a[i], b[i]) ? -1 : 0; ++} ++ ++void ++compare_quiet_greater (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isgreater (a[i], b[i]) ? -1 : 0; ++} ++ ++void ++compare_quiet_greater_equal (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isgreaterequal (a[i], b[i]) ? -1 : 0; ++} ++ ++void ++compare_quiet_not_less (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isless (a[i], b[i]) ? 0 : -1; ++} ++ ++void ++compare_quiet_greater_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_islessequal (a[i], b[i]) ? 0 : -1; ++} ++ ++void ++compare_quiet_not_greater (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isgreater (a[i], b[i]) ? 0 : -1; ++} ++ ++void ++compare_quiet_less_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isgreaterequal (a[i], b[i]) ? 0 : -1; ++} ++ ++void ++compare_quiet_unordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isunordered (a[i], b[i]) ? -1 : 0; ++} ++ ++void ++compare_quiet_ordered (void) ++{ ++ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) ++ c[i] = __builtin_isunordered (a[i], b[i]) ? 0 : -1; ++} ++ ++/* The "-" matches the .size directive after the function ++ body, so we can ensure the instruction is in the correct function. */ ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +new file mode 100644 +index 000000000..d8017caaa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++ ++#define F double ++#define I long long ++#define VL 32 ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +new file mode 100644 +index 000000000..b54556475 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */ ++ ++#define VL 32 ++ ++#include "vfcmp-f.c" ++ ++/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ ++/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ +-- +2.43.0 + diff --git a/0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch b/0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch new file mode 100644 index 0000000..7e9e742 --- /dev/null +++ b/0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch @@ -0,0 +1,190 @@ +From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 19 Dec 2023 04:48:03 +0800 +Subject: [PATCH 076/188] LoongArch: Use force_reg instead of gen_reg_rtx + + emit_move_insn in vec_init expander [PR113033] + +Jakub says: + + Then that seems like a bug in the loongarch vec_init pattern(s). + Those really don't have a predicate in any of the backends on the + input operand, so they need to force_reg it if it is something it + can't handle. I've looked e.g. at i386 vec_init and that is exactly + what it does, see the various tests + force_reg calls in + ix86_expand_vector_init*. + +So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033. + +gcc/ChangeLog: + + PR target/113033 + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Replace gen_reg_rtx + + emit_move_insn with force_reg. + (loongarch_expand_vector_init): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/113033 + * gcc.target/loongarch/pr113033.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 38 ++++++------------- + gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++ + 2 files changed, 35 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a22601d88..000d2d623 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + gcc_unreachable (); + } + } +- temp = gen_reg_rtx (imode); ++ + if (imode == GET_MODE (same)) + temp2 = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) +@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + else + temp2 = lowpart_subreg (imode, same, GET_MODE (same)); + } +- emit_move_insn (temp, temp2); ++ ++ temp = force_reg (imode, temp2); + + switch (vmode) + { +@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals) + to reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi[0]); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val_hi[1]); ++ op0 = force_reg (imode, val_hi[0]); ++ op1 = force_reg (imode, val_hi[1]); + emit_insn ( + loongarch_vec_repl2_256 (target_hi, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi[i]); ++ op0 = force_reg (imode, val_hi[i]); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } + } + else + { ++ op0 = force_reg (imode, val_hi[i]); + /* Assign the lowest element of val_hi to all elements + of target_hi. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi[0]); + emit_insn (loongarch_vec_repl1_256 (target_hi, op0)); + } + else if (!rtx_equal_p (val_hi[i], val_hi[0])) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_hi[i]); + emit_insn ( + loongarch_vec_set256 (target_hi, op0, GEN_INT (i))); + } +@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + } + if (!lo_same && !half_same) + { ++ op0 = force_reg (imode, val_lo[i]); + /* Assign the lowest element of val_lo to all elements + of target_lo. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_lo[0]); + emit_insn (loongarch_vec_repl1_128 (target_lo, op0)); + } + else if (!rtx_equal_p (val_lo[i], val_lo[0])) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val_lo[i]); + emit_insn ( + loongarch_vec_set128 (target_lo, op0, GEN_INT (i))); + } +@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals) + reduce the number of instructions. */ + if (i == 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val[0]); +- op1 = gen_reg_rtx (imode); +- emit_move_insn (op1, val[1]); ++ op0 = force_reg (imode, val[0]); ++ op1 = force_reg (imode, val[1]); + emit_insn (loongarch_vec_repl2_128 (target, op0, op1)); + } + else if (i > 1) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val[i]); ++ op0 = force_reg (imode, val[i]); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals) + loongarch_vec_mirror (target, target, const0_rtx)); + return; + } ++ op0 = force_reg (imode, val[i]); + /* Assign the lowest element of val to all elements of + target. */ + if (i == 0) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val[0]); + emit_insn (loongarch_vec_repl1_128 (target, op0)); + } + else if (!rtx_equal_p (val[i], val[0])) + { +- op0 = gen_reg_rtx (imode); +- emit_move_insn (op0, val[i]); + emit_insn ( + loongarch_vec_set128 (target, op0, GEN_INT (i))); + } +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c +new file mode 100644 +index 000000000..4ccd037d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c +@@ -0,0 +1,23 @@ ++/* PR target/113033: ICE with vector left rotate */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx" } */ ++ ++typedef unsigned __attribute__ ((vector_size (16))) v4si; ++typedef unsigned __attribute__ ((vector_size (32))) v8si; ++typedef unsigned long long __attribute__ ((vector_size (16))) v2di; ++typedef unsigned long long __attribute__ ((vector_size (32))) v4di; ++ ++#define TEST(tp) \ ++extern tp data_##tp; \ ++tp \ ++test_##tp (int x) \ ++{ \ ++ const int bit = sizeof (data_##tp[0]) * __CHAR_BIT__; \ ++ data_##tp = data_##tp << (x & (bit - 1)) \ ++ | data_##tp >> (bit - x & (bit - 1)); \ ++} ++ ++TEST (v4si) ++TEST (v8si) ++TEST (v2di) ++TEST (v4di) +-- +2.43.0 + diff --git a/0077-LoongArch-Clean-up-vec_init-expander.patch b/0077-LoongArch-Clean-up-vec_init-expander.patch new file mode 100644 index 0000000..191e9f5 --- /dev/null +++ b/0077-LoongArch-Clean-up-vec_init-expander.patch @@ -0,0 +1,83 @@ +From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 19 Dec 2023 05:02:42 +0800 +Subject: [PATCH 077/188] LoongArch: Clean up vec_init expander + +Non functional change, clean up the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_vector_init_same): Remove "temp2" and reuse + "temp" instead. + (loongarch_expand_vector_init): Use gcc_unreachable () instead + of gcc_assert (0), and fix the comment for it. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 000d2d623..3aeafeafd 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + machine_mode vmode = GET_MODE (target); + machine_mode imode = GET_MODE_INNER (vmode); + rtx same = XVECEXP (vals, 0, 0); +- rtx temp, temp2; ++ rtx temp; + + if (CONST_INT_P (same) && nvar == 0 + && loongarch_signed_immediate_p (INTVAL (same), 10, 0)) +@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + } + + if (imode == GET_MODE (same)) +- temp2 = same; ++ temp = same; + else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) + { + if (GET_CODE (same) == MEM) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); ++ temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); + } + else +- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0); ++ temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0); + } + else + { +@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) + { + rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); + loongarch_emit_move (reg_tmp, same); +- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); ++ temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); + } + else +- temp2 = lowpart_subreg (imode, same, GET_MODE (same)); ++ temp = lowpart_subreg (imode, same, GET_MODE (same)); + } + +- temp = force_reg (imode, temp2); ++ temp = force_reg (imode, temp); + + switch (vmode) + { +@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals) + return; + } + +- /* Loongson is the only cpu with vectors with more elements. */ +- gcc_assert (0); ++ /* No LoongArch CPU supports vectors with more elements as at now. */ ++ gcc_unreachable (); + } + + /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ +-- +2.43.0 + diff --git a/0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch b/0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch new file mode 100644 index 0000000..5c2ca9a --- /dev/null +++ b/0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch @@ -0,0 +1,78 @@ +From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Thu, 14 Dec 2023 20:49:04 +0800 +Subject: [PATCH 078/188] LoongArch: Fix incorrect code generation for sad + pattern + +When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c +and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad +code. This patch to fixed them, for sad patterns, use zero expansion instead of sign +expansion for reduction. + +Currently, we are fixing failed vectorized tests, and in the future, we will +enable more tests of "vect" for LoongArch. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use zero expansion instruction. + * config/loongarch/lsx.md: Ditto. +--- + gcc/config/loongarch/lasx.md | 8 ++++---- + gcc/config/loongarch/lsx.md | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 921ce0eeb..9ca3f9278 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -5021,8 +5021,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2])); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); + DONE; + }) +@@ -5038,8 +5038,8 @@ + rtx t2 = gen_reg_rtx (V16HImode); + rtx t3 = gen_reg_rtx (V8SImode); + emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2])); +- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv8si3 (operands[0], t3, operands[3])); + DONE; + }) +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 57e0ee3d4..7f5fff40a 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -3385,8 +3385,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2])); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); + DONE; + }) +@@ -3402,8 +3402,8 @@ + rtx t2 = gen_reg_rtx (V8HImode); + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2])); +- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1)); +- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2)); ++ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1)); ++ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2)); + emit_insn (gen_addv4si3 (operands[0], t3, operands[3])); + DONE; + }) +-- +2.43.0 + diff --git a/0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch b/0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch new file mode 100644 index 0000000..1096a82 --- /dev/null +++ b/0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch @@ -0,0 +1,68 @@ +From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Wed, 13 Dec 2023 09:31:07 +0800 +Subject: [PATCH 079/188] LoongArch: Modify the check type of the vector + builtin function. + +On LoongArch architecture, using the latest gcc14 in regression test, +it is found that the vector test cases in vector directory appear FAIL +entries with unmatched pointer types. In order to solve this kind of +problem, the type of the variable in the check result is modified with +the parameter type defined in the vector builtin function. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/simd_correctness_check.h:The variable + types in the check results are modified in conjunction with the + parameter types defined in the vector builtin function. +--- + .../loongarch/vector/simd_correctness_check.h | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +index eb7fbd59c..551340bd5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h ++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +@@ -8,11 +8,12 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \ + { \ +- long *temp_ref = &ref[i], *temp_res = &res[i]; \ ++ long long *temp_ref = (long long *)&ref[i], \ ++ *temp_res = (long long *)&res[i]; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "[%ld]:0x%lx, got: 0x%lx\n", \ ++ "[%ld]:0x%016lx, got: 0x%016lx\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -28,11 +29,11 @@ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \ + { \ +- int *temp_ref = &ref[i], *temp_res = &res[i]; \ ++ int *temp_ref = (int *)&ref[i], *temp_res = (int *)&res[i]; \ + if (abs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ +- "[%ld]:0x%x, got: 0x%x\n", \ ++ "[%ld]:0x%08x, got: 0x%08x\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ + } \ +@@ -47,8 +48,8 @@ + { \ + if (ref != res) \ + { \ +- printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \ +- line, ref, res); \ ++ printf (" error: %s at line %ld , expected 0x:%016x", \ ++ "got 0x:%016x\n", __FILE__, line, ref, res); \ + } \ + } \ + while (0) +-- +2.43.0 + diff --git a/0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch b/0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch new file mode 100644 index 0000000..b813c07 --- /dev/null +++ b/0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch @@ -0,0 +1,250 @@ +From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001 +From: Jiajie Chen +Date: Wed, 13 Dec 2023 23:26:01 +0800 +Subject: [PATCH 080/188] LoongArch: extend.texi: Fix typos in LSX intrinsics + +Several typos have been found and fixed: missing semicolons, using +variable name instead of type, duplicate functions and wrong types. + +gcc/ChangeLog: + + * doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name. + (__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d, + __lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d, + __lsx_vfrintrz_s): fix return types. + (__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d, + __lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d, + __lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d, + __lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q, + __lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q, + __lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q, + __lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q, + __lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon. + (__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return + type. + (__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h, + __lsx_vstelm_w): use imm type for the last argument. + (__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w, + __lsx_vsigncov_d): remove duplicate definitions. +--- + gcc/doc/extend.texi | 90 ++++++++++++++++++++++----------------------- + 1 file changed, 43 insertions(+), 47 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index bb042ae78..ac8da4e80 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i); + int __lsx_bz_w (__m128i); + __m128i __lsx_vabsd_b (__m128i, __m128i); + __m128i __lsx_vabsd_bu (__m128i, __m128i); +-__m128i __lsx_vabsd_di (__m128i, __m128i); ++__m128i __lsx_vabsd_d (__m128i, __m128i); + __m128i __lsx_vabsd_du (__m128i, __m128i); + __m128i __lsx_vabsd_h (__m128i, __m128i); + __m128i __lsx_vabsd_hu (__m128i, __m128i); +@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128); + __m128d __lsx_vfrecip_d (__m128d); + __m128 __lsx_vfrecip_s (__m128); + __m128d __lsx_vfrint_d (__m128d); +-__m128i __lsx_vfrintrm_d (__m128d); +-__m128i __lsx_vfrintrm_s (__m128); +-__m128i __lsx_vfrintrne_d (__m128d); +-__m128i __lsx_vfrintrne_s (__m128); +-__m128i __lsx_vfrintrp_d (__m128d); +-__m128i __lsx_vfrintrp_s (__m128); +-__m128i __lsx_vfrintrz_d (__m128d); +-__m128i __lsx_vfrintrz_s (__m128); ++__m128d __lsx_vfrintrm_d (__m128d); ++__m128 __lsx_vfrintrm_s (__m128); ++__m128d __lsx_vfrintrne_d (__m128d); ++__m128 __lsx_vfrintrne_s (__m128); ++__m128d __lsx_vfrintrp_d (__m128d); ++__m128 __lsx_vfrintrp_s (__m128); ++__m128d __lsx_vfrintrz_d (__m128d); ++__m128 __lsx_vfrintrz_s (__m128); + __m128 __lsx_vfrint_s (__m128); + __m128d __lsx_vfrsqrt_d (__m128d); + __m128 __lsx_vfrsqrt_s (__m128); +@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15); + __m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1); + __m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7); + __m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3); +-__m128i __lsx_vld (void *, imm_n2048_2047) +-__m128i __lsx_vldi (imm_n1024_1023) +-__m128i __lsx_vldrepl_b (void *, imm_n2048_2047) +-__m128i __lsx_vldrepl_d (void *, imm_n256_255) +-__m128i __lsx_vldrepl_h (void *, imm_n1024_1023) +-__m128i __lsx_vldrepl_w (void *, imm_n512_511) ++__m128i __lsx_vld (void *, imm_n2048_2047); ++__m128i __lsx_vldi (imm_n1024_1023); ++__m128i __lsx_vldrepl_b (void *, imm_n2048_2047); ++__m128i __lsx_vldrepl_d (void *, imm_n256_255); ++__m128i __lsx_vldrepl_h (void *, imm_n1024_1023); ++__m128i __lsx_vldrepl_w (void *, imm_n512_511); + __m128i __lsx_vldx (void *, long int); + __m128i __lsx_vmadd_b (__m128i, __m128i, __m128i); + __m128i __lsx_vmadd_d (__m128i, __m128i, __m128i); +@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i); + __m128i __lsx_vmax_du (__m128i, __m128i); + __m128i __lsx_vmax_h (__m128i, __m128i); + __m128i __lsx_vmax_hu (__m128i, __m128i); +-__m128i __lsx_vmaxi_b (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_b (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_bu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_d (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_d (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_du (__m128i, imm0_31); +-__m128i __lsx_vmaxi_h (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_h (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_hu (__m128i, imm0_31); +-__m128i __lsx_vmaxi_w (__m128i, imm_n16_15) ++__m128i __lsx_vmaxi_w (__m128i, imm_n16_15); + __m128i __lsx_vmaxi_wu (__m128i, imm0_31); + __m128i __lsx_vmax_w (__m128i, __m128i); + __m128i __lsx_vmax_wu (__m128i, __m128i); +@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i); + __m128i __lsx_vmin_du (__m128i, __m128i); + __m128i __lsx_vmin_h (__m128i, __m128i); + __m128i __lsx_vmin_hu (__m128i, __m128i); +-__m128i __lsx_vmini_b (__m128i, imm_n16_15) ++__m128i __lsx_vmini_b (__m128i, imm_n16_15); + __m128i __lsx_vmini_bu (__m128i, imm0_31); +-__m128i __lsx_vmini_d (__m128i, imm_n16_15) ++__m128i __lsx_vmini_d (__m128i, imm_n16_15); + __m128i __lsx_vmini_du (__m128i, imm0_31); +-__m128i __lsx_vmini_h (__m128i, imm_n16_15) ++__m128i __lsx_vmini_h (__m128i, imm_n16_15); + __m128i __lsx_vmini_hu (__m128i, imm0_31); +-__m128i __lsx_vmini_w (__m128i, imm_n16_15) ++__m128i __lsx_vmini_w (__m128i, imm_n16_15); + __m128i __lsx_vmini_wu (__m128i, imm0_31); + __m128i __lsx_vmin_w (__m128i, __m128i); + __m128i __lsx_vmin_wu (__m128i, __m128i); +@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i); + __m128i __lsx_vpickod_h (__m128i, __m128i); + __m128i __lsx_vpickod_w (__m128i, __m128i); + int __lsx_vpickve2gr_b (__m128i, imm0_15); +-unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15); ++unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15); + long int __lsx_vpickve2gr_d (__m128i, imm0_1); + unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1); + int __lsx_vpickve2gr_h (__m128i, imm0_7); +-unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7); ++unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7); + int __lsx_vpickve2gr_w (__m128i, imm0_3); + unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3); + __m128i __lsx_vreplgr2vr_b (int); +@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i); + __m128i __lsx_vsigncov_d (__m128i, __m128i); + __m128i __lsx_vsigncov_h (__m128i, __m128i); + __m128i __lsx_vsigncov_w (__m128i, __m128i); +-__m128i __lsx_vsigncov_b (__m128i, __m128i); +-__m128i __lsx_vsigncov_d (__m128i, __m128i); +-__m128i __lsx_vsigncov_h (__m128i, __m128i); +-__m128i __lsx_vsigncov_w (__m128i, __m128i); + __m128i __lsx_vsle_b (__m128i, __m128i); + __m128i __lsx_vsle_bu (__m128i, __m128i); + __m128i __lsx_vsle_d (__m128i, __m128i); +@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31); + __m128i __lsx_vsran_b_h (__m128i, __m128i); + __m128i __lsx_vsran_h_w (__m128i, __m128i); + __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsran_w_d (__m128i, __m128i); +@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31); + __m128i __lsx_vsrarn_b_h (__m128i, __m128i); + __m128i __lsx_vsrarn_h_w (__m128i, __m128i); + __m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrarn_w_d (__m128i, __m128i); +@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31); + __m128i __lsx_vsrln_b_h (__m128i, __m128i); + __m128i __lsx_vsrln_h_w (__m128i, __m128i); + __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrln_w_d (__m128i, __m128i); +@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31); + __m128i __lsx_vsrlrn_b_h (__m128i, __m128i); + __m128i __lsx_vsrlrn_h_w (__m128i, __m128i); + __m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63); + __m128i __lsx_vsrlrn_w_d (__m128i, __m128i); +@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i); + __m128i __lsx_vssran_h_w (__m128i, __m128i); + __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63); +@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i); + __m128i __lsx_vssrarn_h_w (__m128i, __m128i); + __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63); +@@ -17037,8 +17033,8 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i); + __m128i __lsx_vssrln_h_w (__m128i, __m128i); + __m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63); +@@ -17051,8 +17047,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i); + __m128i __lsx_vssrlrn_h_w (__m128i, __m128i); + __m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15); + __m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15); +-__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127) +-__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127) ++__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127); ++__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127); + __m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31); + __m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63); +@@ -17067,12 +17063,12 @@ __m128i __lsx_vssub_h (__m128i, __m128i); + __m128i __lsx_vssub_hu (__m128i, __m128i); + __m128i __lsx_vssub_w (__m128i, __m128i); + __m128i __lsx_vssub_wu (__m128i, __m128i); +-void __lsx_vst (__m128i, void *, imm_n2048_2047) +-void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx); +-void __lsx_vstx (__m128i, void *, long int) ++void __lsx_vst (__m128i, void *, imm_n2048_2047); ++void __lsx_vstelm_b (__m128i, void *, imm_n128_127, imm0_15); ++void __lsx_vstelm_d (__m128i, void *, imm_n128_127, imm0_1); ++void __lsx_vstelm_h (__m128i, void *, imm_n128_127, imm0_7); ++void __lsx_vstelm_w (__m128i, void *, imm_n128_127, imm0_3); ++void __lsx_vstx (__m128i, void *, long int); + __m128i __lsx_vsub_b (__m128i, __m128i); + __m128i __lsx_vsub_d (__m128i, __m128i); + __m128i __lsx_vsub_h (__m128i, __m128i); +-- +2.43.0 + diff --git a/0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch b/0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch new file mode 100644 index 0000000..a4aa098 --- /dev/null +++ b/0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch @@ -0,0 +1,60 @@ +From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 19 Dec 2023 16:43:17 +0800 +Subject: [PATCH 081/188] LoongArch: Fix builtin function prototypes for LASX + in doc. + +gcc/ChangeLog: + + * doc/extend.texi:According to the documents submitted earlier, + Two problems with function return types and using the actual types + of parameters instead of variable names were found and fixed. +--- + gcc/doc/extend.texi | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index ac8da4e80..c793c9c5d 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256); + __m256d __lasx_xvfrecip_d (__m256d); + __m256 __lasx_xvfrecip_s (__m256); + __m256d __lasx_xvfrint_d (__m256d); +-__m256i __lasx_xvfrintrm_d (__m256d); +-__m256i __lasx_xvfrintrm_s (__m256); +-__m256i __lasx_xvfrintrne_d (__m256d); +-__m256i __lasx_xvfrintrne_s (__m256); +-__m256i __lasx_xvfrintrp_d (__m256d); +-__m256i __lasx_xvfrintrp_s (__m256); +-__m256i __lasx_xvfrintrz_d (__m256d); +-__m256i __lasx_xvfrintrz_s (__m256); ++__m256d __lasx_xvfrintrm_d (__m256d); ++__m256 __lasx_xvfrintrm_s (__m256); ++__m256d __lasx_xvfrintrne_d (__m256d); ++__m256 __lasx_xvfrintrne_s (__m256); ++__m256d __lasx_xvfrintrp_d (__m256d); ++__m256 __lasx_xvfrintrp_s (__m256); ++__m256d __lasx_xvfrintrz_d (__m256d); ++__m256 __lasx_xvfrintrz_s (__m256); + __m256 __lasx_xvfrint_s (__m256); + __m256d __lasx_xvfrsqrt_d (__m256d); + __m256 __lasx_xvfrsqrt_s (__m256); +@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i); + __m256i __lasx_xvssub_w (__m256i, __m256i); + __m256i __lasx_xvssub_wu (__m256i, __m256i); + void __lasx_xvst (__m256i, void *, imm_n2048_2047); +-void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx); +-void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx); ++void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31); ++void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3); ++void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15); ++void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7); + void __lasx_xvstx (__m256i, void *, long int); + __m256i __lasx_xvsub_b (__m256i, __m256i); + __m256i __lasx_xvsub_d (__m256i, __m256i); +-- +2.43.0 + diff --git a/0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch b/0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch new file mode 100644 index 0000000..ca4d8f4 --- /dev/null +++ b/0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch @@ -0,0 +1,92 @@ +From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Tue, 5 Dec 2023 14:44:35 +0800 +Subject: [PATCH 082/188] LoongArch: Add asm modifiers to the LSX and LASX + directives in the doc. + +gcc/ChangeLog: + + * doc/extend.texi:Add modifiers to the vector of asm in the doc. + * doc/md.texi:Refine the description of the modifier 'f' in the doc. +--- + gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++ + gcc/doc/md.texi | 2 +- + 2 files changed, 47 insertions(+), 1 deletion(-) + +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index c793c9c5d..bcb9329c2 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch + @item @code{d} @tab Same as @code{c}. + @item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register. + @item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}. ++@item @code{u} @tab Print a LASX register. ++@item @code{w} @tab Print a LSX register. + @item @code{X} @tab Print a constant integer operand in hexadecimal. + @item @code{z} @tab Print the operand in its unmodified form, followed by a comma. + @end multitable + ++References to input and output operands in the assembler template of extended ++asm statements can use modifiers to affect the way the operands are formatted ++in the code output to the assembler. For example, the following code uses the ++'w' modifier for LoongArch: ++ ++@example ++test-asm.c: ++ ++#include ++ ++__m128i foo (void) ++@{ ++__m128i a,b,c; ++__asm__ ("vadd.d %w0,%w1,%w2\n\t" ++ :"=f" (c) ++ :"f" (a),"f" (b)); ++ ++return c; ++@} ++ ++@end example ++ ++@noindent ++The compile command for the test case is as follows: ++ ++@example ++gcc test-asm.c -mlsx -S -o test-asm.s ++@end example ++ ++@noindent ++The assembly statement produces the following assembly code: ++ ++@example ++vadd.d $vr0,$vr0,$vr1 ++@end example ++ ++This is a 128-bit vector addition instruction, @code{c} (referred to in the ++template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are ++the inputs. @code{__m128i} is a vector data type defined in the file ++@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}). The symbol '=f' ++represents a constraint using a floating-point register as an output type, and ++the 'f' in the input operand represents a constraint using a floating-point ++register operand, which can refer to the definition of a constraint ++(@xref{Constraints}) in gcc. + + @lowersections + @include md.texi +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index b58da0787..a2e839073 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -2750,7 +2750,7 @@ $r1h + @item LoongArch---@file{config/loongarch/constraints.md} + @table @code + @item f +-A floating-point register (if available). ++A floating-point or vector register (if available). + @item k + A memory operand whose address is formed by a base register and + (optionally scaled) index register. +-- +2.43.0 + diff --git a/0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch b/0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch new file mode 100644 index 0000000..c9ceaad --- /dev/null +++ b/0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch @@ -0,0 +1,392 @@ +From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 15 Dec 2023 01:49:40 +0800 +Subject: [PATCH 083/188] LoongArch: Implement FCCmode reload and + cstore4 + +We used a branch to load floating-point comparison results into GPR. +This is very slow when the branch is not predictable. + +Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM. +Then implement cstore4. + +gcc/ChangeLog: + + * config/loongarch/loongarch-tune.h + (loongarch_rtx_cost_data::movcf2gr): New field. + (loongarch_rtx_cost_data::movcf2gr_): New method. + (loongarch_rtx_cost_data::use_movcf2gr): New method. + * config/loongarch/loongarch-def.cc + (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr + to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based + on timing on LA464. + (loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) for LA664. + (loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to + COSTS_N_INSNS (1) + 1. + * config/loongarch/predicates.md (loongarch_fcmp_operator): New + predicate. + * config/loongarch/loongarch.md (movfcc): Change to + define_expand. + (movfcc_internal): New define_insn. + (fcc_to_): New define_insn. + (cstore4): New define_expand. + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs + and GPRs. + (loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR. + (loongarch_emit_float_compare): Call gen_reg_rtx instead of + loongarch_allocate_fcc. + (loongarch_allocate_fcc): Remove. + (loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS. + (loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS. + (loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS, + FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/movcf2gr.c: New test. + * gcc.target/loongarch/movcf2gr-via-fr.c: New test. +--- + gcc/config/loongarch/loongarch-def.cc | 13 +++- + gcc/config/loongarch/loongarch-tune.h | 15 +++- + gcc/config/loongarch/loongarch.cc | 70 ++++++++++++------- + gcc/config/loongarch/loongarch.md | 69 ++++++++++++++++-- + gcc/config/loongarch/predicates.md | 4 ++ + .../gcc.target/loongarch/movcf2gr-via-fr.c | 10 +++ + gcc/testsuite/gcc.target/loongarch/movcf2gr.c | 9 +++ + 7 files changed, 157 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 4a8885e83..843be78e4 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + int_mult_di (COSTS_N_INSNS (4)), + int_div_si (COSTS_N_INSNS (5)), + int_div_di (COSTS_N_INSNS (5)), ++ movcf2gr (COSTS_N_INSNS (7)), ++ movgr2cf (COSTS_N_INSNS (15)), + branch_cost (6), + memory_latency (4) {} + + /* The following properties cannot be looked up directly using "cpucfg". + So it is necessary to provide a default value for "unknown native" + tune targets (i.e. -mtune=native while PRID does not correspond to +- any known "-mtune" type). Currently all numbers are default. */ ++ any known "-mtune" type). */ + array_tune loongarch_cpu_rtx_cost_data = +- array_tune (); ++ array_tune () ++ .set (CPU_LA664, ++ loongarch_rtx_cost_data () ++ .movcf2gr_ (COSTS_N_INSNS (1)) ++ .movgr2cf_ (COSTS_N_INSNS (1))); + + /* RTX costs to use when optimizing for size. + We use a value slightly larger than COSTS_N_INSNS (1) for all of them +@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + .int_mult_si_ (COST_COMPLEX_INSN) + .int_mult_di_ (COST_COMPLEX_INSN) + .int_div_si_ (COST_COMPLEX_INSN) +- .int_div_di_ (COST_COMPLEX_INSN); ++ .int_div_di_ (COST_COMPLEX_INSN) ++ .movcf2gr_ (COST_COMPLEX_INSN); + + array_tune loongarch_cpu_issue_rate = array_tune () + .set (CPU_NATIVE, 4) +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 616b94e87..26f163f0a 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data + unsigned short int_mult_di; + unsigned short int_div_si; + unsigned short int_div_di; ++ unsigned short movcf2gr; ++ unsigned short movgr2cf; + unsigned short branch_cost; + unsigned short memory_latency; + +@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data + return *this; + } + ++ loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr) ++ { ++ movcf2gr = _movcf2gr; ++ return *this; ++ } ++ ++ loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf) ++ { ++ movgr2cf = _movgr2cf; ++ return *this; ++ } ++ + loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) + { + branch_cost = _branch_cost; +@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data + memory_latency = _memory_latency; + return *this; + } +- + }; + + /* Costs to use when optimizing for size. */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 3aeafeafd..56f631b1a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) + OPTAB_DIRECT); + } + +-/* Allocate a floating-point condition-code register of mode MODE. */ +- +-static rtx +-loongarch_allocate_fcc (machine_mode mode) +-{ +- unsigned int regno, count; +- +- gcc_assert (TARGET_HARD_FLOAT); +- +- if (mode == FCCmode) +- count = 1; +- else +- gcc_unreachable (); +- +- cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); +- if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST) +- cfun->machine->next_fcc = 0; +- +- regno = FCC_REG_FIRST + cfun->machine->next_fcc; +- cfun->machine->next_fcc += count; +- return gen_rtx_REG (mode, regno); +-} +- + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ + + static void +@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) + operands for FCMP.cond.fmt, instead a reversed condition code is + required and a test for false. */ + *code = NE; +- *op0 = loongarch_allocate_fcc (FCCmode); ++ *op0 = gen_reg_rtx (FCCmode); + + *op1 = const0_rtx; + loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); +@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + enum mode_class mclass; + + if (mode == FCCmode) +- return FCC_REG_P (regno); ++ return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno); + + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); +@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from) + /* MOVFR2GR, etc. */ + return 4; + ++ case FCC_REGS: ++ return loongarch_cost->movcf2gr; ++ + default: + return 0; + } +@@ -6863,6 +6843,9 @@ loongarch_move_from_gpr_cost (reg_class_t to) + /* MOVGR2FR, etc. */ + return 4; + ++ case FCC_REGS: ++ return loongarch_cost->movgr2cf; ++ + default: + return 0; + } +@@ -6897,6 +6880,10 @@ loongarch_register_move_cost (machine_mode mode, reg_class_t from, + if (to == dregs) + return loongarch_move_to_gpr_cost (from); + ++ /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */ ++ if (from == FCC_REGS || to == FCC_REGS) ++ return COSTS_N_INSNS (from == to ? 2 : 1); ++ + /* Handles cases that require a GPR temporary. */ + cost1 = loongarch_move_to_gpr_cost (from); + if (cost1 != 0) +@@ -6933,6 +6920,39 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + + regno = true_regnum (x); + ++ if (mode == FCCmode) ++ { ++ if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno)) ++ { ++ if (FCC_REG_P (regno)) ++ return FP_REGS; ++ ++ auto fn = in_p ? loongarch_move_from_gpr_cost ++ : loongarch_move_to_gpr_cost; ++ ++ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) ++ return FP_REGS; ++ ++ return GP_REG_P (regno) ? NO_REGS : GR_REGS; ++ } ++ ++ if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno)) ++ { ++ auto fn = in_p ? loongarch_move_to_gpr_cost ++ : loongarch_move_from_gpr_cost; ++ ++ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) ++ return FP_REGS; ++ ++ return NO_REGS; ++ } ++ ++ if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) ++ return GR_REGS; ++ ++ return NO_REGS; ++ } ++ + if (reg_class_subset_p (rclass, FP_REGS)) + { + if (regno < 0 +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23368008e..6cf71d9e4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2283,11 +2283,72 @@ + + ;; Clear one FCC register + +-(define_insn "movfcc" +- [(set (match_operand:FCC 0 "register_operand" "=z") +- (const_int 0))] ++(define_expand "movfcc" ++ [(set (match_operand:FCC 0 "") ++ (match_operand:FCC 1 ""))] ++ "TARGET_HARD_FLOAT" ++{ ++ if (memory_operand (operands[0], FCCmode) ++ && memory_operand (operands[1], FCCmode)) ++ operands[1] = force_reg (FCCmode, operands[1]); ++}) ++ ++(define_insn "movfcc_internal" ++ [(set (match_operand:FCC 0 "nonimmediate_operand" ++ "=z,z,*f,*f,*r,*r,*m,*f,*r,z,*r") ++ (match_operand:FCC 1 "reg_or_0_operand" ++ "J,*f,z,*f,J*r,*m,J*r,J*r,*f,*r,z"))] ++ "TARGET_HARD_FLOAT" ++ "@ ++ fcmp.caf.s\t%0,$f0,$f0 ++ movfr2cf\t%0,%1 ++ movcf2fr\t%0,%1 ++ fmov.s\t%0,%1 ++ or\t%0,%z1,$r0 ++ ld.b\t%0,%1 ++ st.b\t%z1,%0 ++ movgr2fr.w\t%0,%1 ++ movfr2gr.s\t%0,%1 ++ movgr2cf\t%0,%1 ++ movcf2gr\t%0,%1" ++ [(set_attr "type" "move") ++ (set_attr "mode" "FCC")]) ++ ++(define_insn "fcc_to_" ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (if_then_else:X (ne (match_operand:FCC 1 "register_operand" "0") ++ (const_int 0)) ++ (const_int 1) ++ (const_int 0)))] ++ "TARGET_HARD_FLOAT" + "" +- "fcmp.caf.s\t%0,$f0,$f0") ++ [(set_attr "length" "0") ++ (set_attr "type" "ghost")]) ++ ++(define_expand "cstore4" ++ [(set (match_operand:SI 0 "register_operand") ++ (match_operator:SI 1 "loongarch_fcmp_operator" ++ [(match_operand:ANYF 2 "register_operand") ++ (match_operand:ANYF 3 "register_operand")]))] ++ "" ++ { ++ rtx fcc = gen_reg_rtx (FCCmode); ++ rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), FCCmode, ++ operands[2], operands[3]); ++ ++ emit_insn (gen_rtx_SET (fcc, cmp)); ++ if (TARGET_64BIT) ++ { ++ rtx gpr = gen_reg_rtx (DImode); ++ emit_insn (gen_fcc_to_di (gpr, fcc)); ++ emit_insn (gen_rtx_SET (operands[0], ++ lowpart_subreg (SImode, gpr, DImode))); ++ } ++ else ++ emit_insn (gen_fcc_to_si (operands[0], fcc)); ++ ++ DONE; ++ }) + + ;; Conditional move instructions. + +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 88e54c915..58f9a7826 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -590,6 +590,10 @@ + (define_predicate "loongarch_cstore_operator" + (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu")) + ++(define_predicate "loongarch_fcmp_operator" ++ (match_code ++ "unordered,uneq,unlt,unle,eq,lt,le,ordered,ltgt,ne,ge,gt,unge,ungt")) ++ + (define_predicate "small_data_pattern" + (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") + (match_test "loongarch_small_data_pattern_p (op)"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c +new file mode 100644 +index 000000000..23334a3a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mtune=la464 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "movcf2fr\t\\\$f\[0-9\]+,\\\$fcc" } } */ ++/* { dg-final { scan-assembler "movfr2gr\\.s\t\\\$r4" } } */ ++ ++int ++t (float a, float b) ++{ ++ return a > b; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c +new file mode 100644 +index 000000000..d27c393b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mtune=la664 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "movcf2gr\t\\\$r4,\\\$fcc" } } */ ++ ++int ++t (float a, float b) ++{ ++ return a > b; ++} +-- +2.43.0 + diff --git a/0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch b/0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch new file mode 100644 index 0000000..1a98af9 --- /dev/null +++ b/0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch @@ -0,0 +1,69 @@ +From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 17 Dec 2023 04:26:23 +0800 +Subject: [PATCH 084/188] LoongArch: Add sign_extend pattern for 32-bit rotate + shift + +Remove a redundant sign extension. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotrsi3_extend): New + define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotrw.c: New test. +--- + gcc/config/loongarch/loongarch.md | 10 ++++++++++ + gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6cf71d9e4..44e8d336a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2893,6 +2893,16 @@ + [(set_attr "type" "shift,shift") + (set_attr "mode" "")]) + ++(define_insn "rotrsi3_extend" ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (sign_extend:DI ++ (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") ++ (match_operand:SI 2 "arith_operand" "r,I"))))] ++ "TARGET_64BIT" ++ "rotr%i2.w\t%0,%1,%2" ++ [(set_attr "type" "shift,shift") ++ (set_attr "mode" "SI")]) ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c +new file mode 100644 +index 000000000..6ed45e8b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */ ++/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++unsigned ++rotr (unsigned a, unsigned b) ++{ ++ return a >> b | a << 32 - b; ++} ++ ++unsigned ++rotri (unsigned a) ++{ ++ return a >> 5 | a << 27; ++} +-- +2.43.0 + diff --git a/0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch b/0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch new file mode 100644 index 0000000..f8ee4e6 --- /dev/null +++ b/0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch @@ -0,0 +1,37 @@ +From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Mon, 25 Dec 2023 11:20:23 +0800 +Subject: [PATCH 085/188] LoongArch: Fixed bug in *bstrins__for_ior_mask + template. + +We found that using the latest compiled gcc will cause a miscompare error +when running spec2006 400.perlbench test with -flto turned on. After testing, +it was found that only the LoongArch architecture will report errors. +The first error commit was located through the git bisect command as +r14-3773-g5b857e87201335. Through debugging, it was found that the problem +was that the split condition of the *bstrins__for_ior_mask template was +empty, which should actually be consistent with the insn condition. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md: Adjust. +--- + gcc/config/loongarch/loongarch.md | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 44e8d336a..3d5b75825 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1489,7 +1489,7 @@ + "loongarch_pre_reload_split () && \ + loongarch_use_bstrins_for_ior_with_mask (mode, operands)" + "#" +- "" ++ "&& true" + [(set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4)) + (match_dup 3))] +-- +2.43.0 + diff --git a/0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch b/0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch new file mode 100644 index 0000000..ee3e1b9 --- /dev/null +++ b/0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch @@ -0,0 +1,132 @@ +From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 22 Dec 2023 16:22:03 +0800 +Subject: [PATCH 086/188] LoongArch: Fix insn output of vec_concat templates + for LASX. + +When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following +instruction block are being generated by vec_concatv32qi (which is +generated by vec_initv32qiv16qi) at entrance of foo() function: + + vldx $vr3,$r5,$r6 + vld $vr2,$r5,0 + xvpermi.q $xr2,$xr3,0x20 + +causes the reversion of vec_initv32qiv16qi operation's high and +low 128-bit part. + +According to other target's similar impl and LSX impl for following +RTL representation, current definition in lasx.md of "vec_concat" +are wrong: + + (set (op0) (vec_concat (op1) (op2))) + +For correct behavior, the last argument of xvpermi.q should be 0x02 +instead of 0x20. This patch fixes this issue and cleanup the vec_concat +template impl. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_concatv4di): Delete. + (vec_concatv8si): Delete. + (vec_concatv16hi): Delete. + (vec_concatv32qi): Delete. + (vec_concatv4df): Delete. + (vec_concatv8sf): Delete. + (vec_concat): New template with insn output fixed. +--- + gcc/config/loongarch/lasx.md | 74 ++++-------------------------------- + 1 file changed, 7 insertions(+), 67 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 9ca3f9278..46150f2fb 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -577,77 +577,17 @@ + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +-(define_insn "vec_concatv4di" +- [(set (match_operand:V4DI 0 "register_operand" "=f") +- (vec_concat:V4DI +- (match_operand:V2DI 1 "register_operand" "0") +- (match_operand:V2DI 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) +- +-(define_insn "vec_concatv8si" +- [(set (match_operand:V8SI 0 "register_operand" "=f") +- (vec_concat:V8SI +- (match_operand:V4SI 1 "register_operand" "0") +- (match_operand:V4SI 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) +- +-(define_insn "vec_concatv16hi" +- [(set (match_operand:V16HI 0 "register_operand" "=f") +- (vec_concat:V16HI +- (match_operand:V8HI 1 "register_operand" "0") +- (match_operand:V8HI 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) +- +-(define_insn "vec_concatv32qi" +- [(set (match_operand:V32QI 0 "register_operand" "=f") +- (vec_concat:V32QI +- (match_operand:V16QI 1 "register_operand" "0") +- (match_operand:V16QI 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) +- +-(define_insn "vec_concatv4df" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (vec_concat:V4DF +- (match_operand:V2DF 1 "register_operand" "0") +- (match_operand:V2DF 2 "register_operand" "f")))] +- "ISA_HAS_LASX" +-{ +- return "xvpermi.q\t%u0,%u2,0x20"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "vec_concatv8sf" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (vec_concat:V8SF +- (match_operand:V4SF 1 "register_operand" "0") +- (match_operand:V4SF 2 "register_operand" "f")))] ++(define_insn "vec_concat" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand: 1 "register_operand" "0") ++ (match_operand: 2 "register_operand" "f")))] + "ISA_HAS_LASX" + { +- return "xvpermi.q\t%u0,%u2,0x20"; ++ return "xvpermi.q\t%u0,%u2,0x02"; + } + [(set_attr "type" "simd_splat") +- (set_attr "mode" "V4DI")]) ++ (set_attr "mode" "")]) + + ;; xshuf.w + (define_insn "lasx_xvperm_" +-- +2.43.0 + diff --git a/0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch b/0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch new file mode 100644 index 0000000..f137349 --- /dev/null +++ b/0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch @@ -0,0 +1,232 @@ +From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 22 Dec 2023 16:18:44 +0800 +Subject: [PATCH 087/188] LoongArch: Fix ICE when passing two same vector + argument consecutively + +Following code will cause ICE on LoongArch target: + + #include + + extern void bar (__m128i, __m128i); + + __m128i a; + + void + foo () + { + bar (a, a); + } + +It is caused by missing constraint definition in mov_lsx. This +patch fixes the template and remove the unnecessary processing from +loongarch_split_move () function. + +This patch also cleanup the redundant definition from +loongarch_split_move () and loongarch_split_move_p (). + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + * config/loongarch/loongarch-protos.h + (loongarch_split_move): Remove unnecessary argument. + (loongarch_split_move_insn_p): Delete. + (loongarch_split_move_insn): Delete. + * config/loongarch/loongarch.cc + (loongarch_split_move_insn_p): Delete. + (loongarch_load_store_insns): Use loongarch_split_move_p + directly. + (loongarch_split_move): remove the unnecessary processing. + (loongarch_split_move_insn): Delete. + * config/loongarch/lsx.md: Use loongarch_split_move and + loongarch_split_move_p directly. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test. +--- + gcc/config/loongarch/lasx.md | 4 +- + gcc/config/loongarch/loongarch-protos.h | 4 +- + gcc/config/loongarch/loongarch.cc | 49 +------------------ + gcc/config/loongarch/lsx.md | 10 ++-- + .../loongarch/vector/lsx/lsx-mov-1.c | 14 ++++++ + 5 files changed, 24 insertions(+), 57 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 46150f2fb..dbbf5a136 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -839,10 +839,10 @@ + [(set (match_operand:LASX 0 "nonimmediate_operand") + (match_operand:LASX 1 "move_operand"))] + "reload_completed && ISA_HAS_LASX +- && loongarch_split_move_insn_p (operands[0], operands[1])" ++ && loongarch_split_move_p (operands[0], operands[1])" + [(const_int 0)] + { +- loongarch_split_move_insn (operands[0], operands[1], curr_insn); ++ loongarch_split_move (operands[0], operands[1]); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index e5fcf3111..2067e50c3 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx); + + extern rtx loongarch_subword (rtx, bool); + extern bool loongarch_split_move_p (rtx, rtx); +-extern void loongarch_split_move (rtx, rtx, rtx); ++extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern bool loongarch_split_move_insn_p (rtx, rtx); +-extern void loongarch_split_move_insn (rtx, rtx, rtx); + extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 56f631b1a..5c278386a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x) + return low + high; + } + +-bool loongarch_split_move_insn_p (rtx dest, rtx src); + /* Return one word of 128-bit value OP, taking into account the fixed + endianness of certain registers. BYTE selects from the byte address. */ + +@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn) + { + set = single_set (insn); + if (set +- && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) ++ && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set))) + might_split_p = false; + } + +@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src) + SPLIT_TYPE describes the split condition. */ + + void +-loongarch_split_move (rtx dest, rtx src, rtx insn_) ++loongarch_split_move (rtx dest, rtx src) + { + rtx low_dest; + +@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_) + loongarch_subword (src, true)); + } + } +- +- /* This is a hack. See if the next insn uses DEST and if so, see if we +- can forward SRC for DEST. This is most useful if the next insn is a +- simple store. */ +- rtx_insn *insn = (rtx_insn *) insn_; +- struct loongarch_address_info addr = {}; +- if (insn) +- { +- rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); +- if (next) +- { +- rtx set = single_set (next); +- if (set && SET_SRC (set) == dest) +- { +- if (MEM_P (src)) +- { +- rtx tmp = XEXP (src, 0); +- loongarch_classify_address (&addr, tmp, GET_MODE (tmp), +- true); +- if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) +- validate_change (next, &SET_SRC (set), src, false); +- } +- else +- validate_change (next, &SET_SRC (set), src, false); +- } +- } +- } + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) + op[2] = gen_int_mode (v, mode); + } + +-/* Return true if a move from SRC to DEST in INSN should be split. */ +- +-bool +-loongarch_split_move_insn_p (rtx dest, rtx src) +-{ +- return loongarch_split_move_p (dest, src); +-} +- +-/* Split a move from SRC to DEST in INSN, given that +- loongarch_split_move_insn_p holds. */ +- +-void +-loongarch_split_move_insn (rtx dest, rtx src, rtx insn) +-{ +- loongarch_split_move (dest, src, insn); +-} +- + /* Implement TARGET_CONSTANT_ALIGNMENT. */ + + static HOST_WIDE_INT +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 7f5fff40a..3e3248ef4 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -794,21 +794,21 @@ + }) + + (define_insn "mov_lsx" +- [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f") +- (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r"))] ++ [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r") ++ (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))] + "ISA_HAS_LSX" + { return loongarch_output_move (operands[0], operands[1]); } +- [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert") ++ [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy") + (set_attr "mode" "")]) + + (define_split + [(set (match_operand:LSX 0 "nonimmediate_operand") + (match_operand:LSX 1 "move_operand"))] + "reload_completed && ISA_HAS_LSX +- && loongarch_split_move_insn_p (operands[0], operands[1])" ++ && loongarch_split_move_p (operands[0], operands[1])" + [(const_int 0)] + { +- loongarch_split_move_insn (operands[0], operands[1], curr_insn); ++ loongarch_split_move (operands[0], operands[1]); + DONE; + }) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c +new file mode 100644 +index 000000000..7f9d792eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlsx -O2" } */ ++ ++#include ++ ++extern void bar (__m128i, __m128i); ++ ++__m128i a; ++ ++void ++foo () ++{ ++ bar (a, a); ++} +-- +2.43.0 + diff --git a/0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch b/0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch new file mode 100644 index 0000000..ac9932e --- /dev/null +++ b/0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch @@ -0,0 +1,253 @@ +From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 17 Dec 2023 05:38:20 +0800 +Subject: [PATCH 088/188] LoongArch: Expand left rotate to right rotate with + negated amount + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (rotl3): + New define_expand. + * config/loongarch/simd.md (vrotl3): Likewise. + (rotl3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/rotl-with-rotr.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-vrotr-d.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-b.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-h.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-w.c: New test. + * gcc.target/loongarch/rotl-with-xvrotr-d.c: New test. +--- + gcc/config/loongarch/loongarch.md | 12 ++++++++ + gcc/config/loongarch/simd.md | 29 +++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-rotr.c | 9 ++++++ + .../gcc.target/loongarch/rotl-with-vrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-vrotr-w.c | 28 ++++++++++++++++++ + .../gcc.target/loongarch/rotl-with-xvrotr-b.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-d.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-h.c | 7 +++++ + .../gcc.target/loongarch/rotl-with-xvrotr-w.c | 7 +++++ + 11 files changed, 127 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3d5b75825..ed4d4b906 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2903,6 +2903,18 @@ + [(set_attr "type" "shift,shift") + (set_attr "mode" "SI")]) + ++;; Expand left rotate to right rotate. ++(define_expand "rotl3" ++ [(set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:GPR 0 "register_operand") ++ (rotatert:GPR (match_operand:GPR 1 "register_operand") ++ (match_dup 3)))] ++ "" ++ { ++ operands[3] = gen_reg_rtx (SImode); ++ }); ++ + ;; The following templates were added to generate "bstrpick.d + alsl.d" + ;; instruction pairs. + ;; It is required that the values of const_immalsl_operand and +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 13202f79b..93fb39abc 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -268,6 +268,35 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + ++;; Expand left rotate to right rotate. ++(define_expand "vrotl3" ++ [(set (match_dup 3) ++ (neg:IVEC (match_operand:IVEC 2 "register_operand"))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 3)))] ++ "" ++ { ++ operands[3] = gen_reg_rtx (mode); ++ }); ++ ++;; Expand left rotate with a scalar amount to right rotate: negate the ++;; scalar before broadcasting it because scalar negation is cheaper than ++;; vector negation. ++(define_expand "rotl3" ++ [(set (match_dup 3) ++ (neg:SI (match_operand:SI 2 "register_operand"))) ++ (set (match_dup 4) ++ (vec_duplicate:IVEC (subreg: (match_dup 3) 0))) ++ (set (match_operand:IVEC 0 "register_operand") ++ (rotatert:IVEC (match_operand:IVEC 1 "register_operand") ++ (match_dup 4)))] ++ "" ++ { ++ operands[3] = gen_reg_rtx (SImode); ++ operands[4] = gen_reg_rtx (mode); ++ }); ++ + ;; vrotri.{b/h/w/d} + + (define_insn "rotr3" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +new file mode 100644 +index 000000000..84cc53cec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-final { scan-assembler "rotr\\.w" } } */ ++ ++unsigned ++t (unsigned a, unsigned b) ++{ ++ return a << b | a >> (32 - b); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +new file mode 100644 +index 000000000..14298bf9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */ ++ ++#define TYPE char ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +new file mode 100644 +index 000000000..0e971b323 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */ ++ ++#define TYPE long long ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +new file mode 100644 +index 000000000..93216ebc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */ ++ ++#define TYPE short ++#include "rotl-with-vrotr-w.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +new file mode 100644 +index 000000000..d05b86f47 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */ ++ ++#ifndef VLEN ++#define VLEN 16 ++#endif ++ ++#ifndef TYPE ++#define TYPE int ++#endif ++ ++typedef unsigned TYPE V __attribute__ ((vector_size (VLEN))); ++V a, b, c; ++ ++void ++test (int x) ++{ ++ b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x); ++} ++ ++void ++test2 (void) ++{ ++ for (int i = 0; i < VLEN / sizeof (TYPE); i++) ++ c[i] = a[i] << b[i] | a[i] >> ((int)sizeof (TYPE) * __CHAR_BIT__ - b[i]); ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c +new file mode 100644 +index 000000000..2674b1b61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.b" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.b" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-b.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c +new file mode 100644 +index 000000000..e94403315 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.d" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.d" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-d.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c +new file mode 100644 +index 000000000..3d998941f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.h" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.h" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-h.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c +new file mode 100644 +index 000000000..ca6aa7bae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { scan-assembler-times "xvrotr\\.w" 2 } } */ ++/* { dg-final { scan-assembler-times "xvneg\\.w" 1 } } */ ++ ++#define VLEN 32 ++#include "rotl-with-vrotr-w.c" +-- +2.43.0 + diff --git a/0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch b/0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch new file mode 100644 index 0000000..cda1805 --- /dev/null +++ b/0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch @@ -0,0 +1,104 @@ +From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 27 Dec 2023 04:28:56 +0800 +Subject: [PATCH 089/188] LoongArch: Fix infinite secondary reloading of + FCCmode [PR113148] + +The GCC internal doc says: + + X might be a pseudo-register or a 'subreg' of a pseudo-register, + which could either be in a hard register or in memory. Use + 'true_regnum' to find out; it will return -1 if the pseudo is in + memory and the hard register number if it is in a register. + +So "MEM_P (x)" is not enough for checking if we are reloading from/to +the memory. This bug has caused reload pass to stall and finally ICE +complaining with "maximum number of generated reload insns per insn +achieved", since r14-6814. + +Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue. + +gcc/ChangeLog: + + PR target/113148 + * config/loongarch/loongarch.cc (loongarch_secondary_reload): + Check if regno == -1 besides MEM_P (x) for reloading FCCmode + from/to FPR to/from memory. + +gcc/testsuite/ChangeLog: + + PR target/113148 + * gcc.target/loongarch/pr113148.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 3 +- + gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5c278386a..2e305f940 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + return NO_REGS; + } + +- if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) ++ if (reg_class_subset_p (rclass, FP_REGS) ++ && (regno == -1 || MEM_P (x))) + return GR_REGS; + + return NO_REGS; +diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c +new file mode 100644 +index 000000000..cf48e5520 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c +@@ -0,0 +1,44 @@ ++/* PR 113148: ICE caused by infinite reloading */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */ ++ ++struct bound ++{ ++ double max; ++} drawQuadrant_bound; ++double w4, innerXfromXY_y, computeBound_right_0; ++struct arc_def ++{ ++ double w, h; ++ double a0, a1; ++}; ++static void drawQuadrant (struct arc_def *); ++static void ++computeBound (struct arc_def *def, struct bound *bound) ++{ ++ double ellipsex_1, ellipsex_0; ++ bound->max = def->a1 ?: __builtin_sin (w4) * def->h; ++ if (def->a0 == 5 && def->w == def->h) ++ ; ++ else ++ ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4); ++ if (def->a1 == 5 && def->w == def->h) ++ ellipsex_1 = bound->max; ++ __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4); ++ computeBound_right_0 = ellipsex_0; ++} ++void ++drawArc () ++{ ++ struct arc_def foo; ++ for (;;) ++ drawQuadrant (&foo); ++} ++void ++drawQuadrant (struct arc_def *def) ++{ ++ int y, miny; ++ computeBound (def, &drawQuadrant_bound); ++ while (y >= miny) ++ ; ++} +-- +2.43.0 + diff --git a/0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch b/0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch new file mode 100644 index 0000000..ab8c3f6 --- /dev/null +++ b/0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch @@ -0,0 +1,305 @@ +From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 12 Dec 2023 04:54:21 +0800 +Subject: [PATCH 090/188] LoongArch: Replace -mexplicit-relocs=auto simple-used + address peephole2 with combine + +The problem with peephole2 is it uses a naive sliding-window algorithm +and misses many cases. For example: + + float a[10000]; + float t() { return a[0] + a[8000]; } + +is compiled to: + + la.local $r13,a + la.local $r12,a+32768 + fld.s $f1,$r13,0 + fld.s $f0,$r12,-768 + fadd.s $f0,$f1,$f0 + +by trunk. But as we've explained in r14-4851, the following would be +better with -mexplicit-relocs=auto: + + pcalau12i $r13,%pc_hi20(a) + pcalau12i $r12,%pc_hi20(a+32000) + fld.s $f1,$r13,%pc_lo12(a) + fld.s $f0,$r12,%pc_lo12(a+32000) + fadd.s $f0,$f1,$f0 + +However the sliding-window algorithm just won't detect the pcalau12i/fld +pair to be optimized. Use a define_insn_and_rewrite in combine pass +will work around the issue. + +gcc/ChangeLog: + + * config/loongarch/predicates.md + (symbolic_pcrel_offset_operand): New define_predicate. + (mem_simple_ldst_operand): Likewise. + * config/loongarch/loongarch-protos.h + (loongarch_rewrite_mem_for_simple_ldst): Declare. + * config/loongarch/loongarch.cc + (loongarch_rewrite_mem_for_simple_ldst): Implement. + * config/loongarch/loongarch.md (simple_load): New + define_insn_and_rewrite. + (simple_load_ext): Likewise. + (simple_store): Likewise. + (define_peephole2): Remove la.local/[f]ld peepholes. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c: + New test. + * gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c: + New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 16 +++ + gcc/config/loongarch/loongarch.md | 114 +++++------------- + gcc/config/loongarch/predicates.md | 13 ++ + ...explicit-relocs-auto-single-load-store-2.c | 11 ++ + ...explicit-relocs-auto-single-load-store-3.c | 18 +++ + 6 files changed, 86 insertions(+), 87 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 2067e50c3..5060efbb6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); + extern bool loongarch_pre_reload_split (void); + extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); ++extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx); + + union loongarch_gen_fn_ptrs + { +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 2e305f940..c6318bee9 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + return 0; + } + ++/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto ++ -mcmodel={normal/medium}. */ ++rtx ++loongarch_rewrite_mem_for_simple_ldst (rtx mem) ++{ ++ rtx addr = XEXP (mem, 0); ++ rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ++ UNSPEC_PCALAU12I_GR); ++ rtx new_mem; ++ ++ addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr); ++ new_mem = gen_rtx_MEM (GET_MODE (mem), addr); ++ MEM_COPY_ATTRIBUTES (new_mem, mem); ++ return new_mem; ++} ++ + /* Print the text for PRINT_OPERAND punctation character CH to FILE. + The punctuation characters are: + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index ed4d4b906..3c61a0cf4 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4135,101 +4135,41 @@ + ;; + ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with + ;; 3 instructions). +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (match_dup 0)))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0]) \ +- || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] +- { +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); +- }) +- +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand") +- (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0]) \ +- || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) +- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))] +- { +- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); +- }) +- +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") +- (any_extend:GPR (mem:SUBDI (match_dup 0))))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0]) \ +- || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1)))))] ++(define_insn_and_rewrite "simple_load" ++ [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f") ++ (match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" ""))] ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]); + }) + +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (match_operand:GPR 2 "register_operand") ++(define_insn_and_rewrite "simple_load_ext" ++ [(set (match_operand:GPR 0 "register_operand" "=r") + (any_extend:GPR +- (mem:SUBDI (plus (match_dup 0) +- (match_operand 3 "const_int_operand")))))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0]) \ +- || REGNO (operands[0]) == REGNO (operands[2]))" +- [(set (match_dup 2) +- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0) +- (match_dup 1)))))] +- { +- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); +- }) +- +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:ST_ANY (match_dup 0)) +- (match_operand:ST_ANY 2 "register_operand"))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0])) \ +- && REGNO (operands[0]) != REGNO (operands[2])" +- [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++ (match_operand:SUBDI 1 "mem_simple_ldst_operand" "")))] ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]); + }) + +-(define_peephole2 +- [(set (match_operand:P 0 "register_operand") +- (match_operand:P 1 "symbolic_pcrel_operand")) +- (set (mem:ST_ANY (plus (match_dup 0) +- (match_operand 3 "const_int_operand"))) +- (match_operand:ST_ANY 2 "register_operand"))] +- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \ +- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \ +- && (peep2_reg_dead_p (2, operands[0])) \ +- && REGNO (operands[0]) != REGNO (operands[2])" +- [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))] ++(define_insn_and_rewrite "simple_store" ++ [(set (match_operand:ST_ANY 0 "mem_simple_ldst_operand" "") ++ (match_operand:ST_ANY 1 "reg_or_0_operand" "r,f"))] ++ "loongarch_pre_reload_split () ++ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO ++ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)" ++ "#" ++ "&& true" + { +- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3])); +- emit_insn (gen_pcalau12i_gr (operands[0], operands[1])); ++ operands[0] = loongarch_rewrite_mem_for_simple_ldst (operands[0]); + }) + + ;; Synchronization instructions. +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 58f9a7826..3698b9103 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -579,6 +579,19 @@ + return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL; + }) + ++(define_predicate "symbolic_pcrel_offset_operand" ++ (and (match_code "plus") ++ (match_operand 0 "symbolic_pcrel_operand") ++ (match_operand 1 "const_int_operand"))) ++ ++(define_predicate "mem_simple_ldst_operand" ++ (match_code "mem") ++{ ++ op = XEXP (op, 0); ++ return (symbolic_pcrel_operand (op, Pmode) ++ || symbolic_pcrel_offset_operand (op, Pmode)); ++}) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c +new file mode 100644 +index 000000000..42cb966d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */ ++ ++float a[8001]; ++float ++t (void) ++{ ++ return a[0] + a[8000]; ++} ++ ++/* { dg-final { scan-assembler-not "la.local" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c +new file mode 100644 +index 000000000..32aa5383d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mexplicit-relocs=auto -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "mem/v/c" 2 "final" } } */ ++/* { dg-final { scan-assembler-not "la\\.local" } } */ ++ ++volatile unsigned long counter; ++ ++unsigned long ++read (void) ++{ ++ return counter; ++} ++ ++void ++clear (void) ++{ ++ counter = 0; ++} +-- +2.43.0 + diff --git a/0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch b/0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch new file mode 100644 index 0000000..f913415 --- /dev/null +++ b/0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch @@ -0,0 +1,33 @@ +From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 29 Dec 2023 20:04:34 +0800 +Subject: [PATCH 091/188] LoongArch: Fix the format of + bstrins__for_ior_mask condition (NFC) + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (bstrins__for_ior_mask): + For the condition, remove unneeded trailing "\" and move "&&" to + follow GNU coding style. NFC. +--- + gcc/config/loongarch/loongarch.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 3c61a0cf4..996df66e8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1486,8 +1486,8 @@ + (match_operand:GPR 2 "const_int_operand")) + (and:GPR (match_operand:GPR 3 "register_operand") + (match_operand:GPR 4 "const_int_operand"))))] +- "loongarch_pre_reload_split () && \ +- loongarch_use_bstrins_for_ior_with_mask (mode, operands)" ++ "loongarch_pre_reload_split () ++ && loongarch_use_bstrins_for_ior_with_mask (mode, operands)" + "#" + "&& true" + [(set (match_dup 0) (match_dup 1)) +-- +2.43.0 + diff --git a/0092-LoongArch-Added-TLS-Le-Relax-support.patch b/0092-LoongArch-Added-TLS-Le-Relax-support.patch new file mode 100644 index 0000000..ebb7466 --- /dev/null +++ b/0092-LoongArch-Added-TLS-Le-Relax-support.patch @@ -0,0 +1,280 @@ +From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 12 Dec 2023 16:32:31 +0800 +Subject: [PATCH 092/188] LoongArch: Added TLS Le Relax support. + +Check whether the assembler supports tls le relax. If it supports it, the assembly +instruction sequence of tls le relax will be generated by default. + +The original way to obtain the tls le symbol address: + lu12i.w $rd, %le_hi20(sym) + ori $rd, $rd, %le_lo12(sym) + add.{w/d} $rd, $rd, $tp + +If the assembler supports tls le relax, the following sequence is generated: + + lu12i.w $rd, %le_hi20_r(sym) + add.{w/d} $rd,$rd,$tp,%le_add_r(sym) + addi.{w/d} $rd,$rd,%le_lo12_r(sym) + +gcc/ChangeLog: + + * config.in: Regenerate. + * config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define. + * config/loongarch/loongarch.cc (loongarch_legitimize_tls_address): + Added TLS Le Relax support. + (loongarch_print_operand_reloc): Add the output string of TLS Le Relax. + * config/loongarch/loongarch.md (@add_tls_le_relax): New template. + * configure: Regenerate. + * configure.ac: Check if binutils supports TLS le relax. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Add a function to check whether binutil supports + TLS Le Relax. + * gcc.target/loongarch/tls-le-relax.c: New test. +--- + gcc/config.in | 6 +++ + gcc/config/loongarch/loongarch-opts.h | 4 ++ + gcc/config/loongarch/loongarch.cc | 46 +++++++++++++++++-- + gcc/config/loongarch/loongarch.md | 12 +++++ + gcc/configure | 31 +++++++++++++ + gcc/configure.ac | 5 ++ + .../gcc.target/loongarch/tls-le-relax.c | 12 +++++ + gcc/testsuite/lib/target-supports.exp | 12 +++++ + 8 files changed, 125 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c + +diff --git a/gcc/config.in b/gcc/config.in +index 033cfb98b..7220b2b2b 100644 +--- a/gcc/config.in ++++ b/gcc/config.in +@@ -771,6 +771,12 @@ + #endif + + ++/* Define if your assembler supports tls le relocation. */ ++#ifndef USED_FOR_TARGET ++#undef HAVE_AS_TLS_LE_RELAXATION ++#endif ++ ++ + /* Define if your assembler supports vl/vst/vlm/vstm with an optional + alignment hint argument. */ + #ifndef USED_FOR_TARGET +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 639ed50bd..8491bee0d 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -114,4 +114,8 @@ struct loongarch_flags { + #define HAVE_AS_TLS 0 + #endif + ++#ifndef HAVE_AS_TLS_LE_RELAXATION ++#define HAVE_AS_TLS_LE_RELAXATION 0 ++#endif ++ + #endif /* LOONGARCH_OPTS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c6318bee9..d1b1950dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc) + + case TLS_MODEL_LOCAL_EXEC: + { +- /* la.tls.le; tp-relative add. */ ++ /* la.tls.le; tp-relative add. ++ ++ normal: ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ add.{w/d} $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ tls le relax: ++ lu12i.w $rd, %le_hi20_r(sym) ++ add.{w/d} $rd,$rd,$tp ++ addi.{w/d} $rd,$rd,%le_lo12_r(sym) ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) ++ ++ extreme (When the code model is set to extreme, the TLS le Relax ++ instruction sequence is not generated): ++ lu12i.w $rd, %le_hi20(sym) ++ ori $rd, $rd, %le_lo12(sym) ++ lu32i.d $rd, %le64_lo20(sym) ++ lu52i.d $rd, $rd, %le64_hi12(sym) ++ add.d $rd, $rd, $tp ++ (st.{w/d}/ld.{w/d} $rs, $rd, 0) */ ++ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); +@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc) + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +- emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); ++ ++ /* The assembler does not implement tls le relax support when the ++ code model is extreme, so when the code model is extreme, the ++ old symbol address acquisition method is still used. */ ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ { ++ emit_insn (gen_add_tls_le_relax (Pmode, dest, high, ++ tp, loc)); ++ loongarch_emit_move (dest, ++ gen_rtx_LO_SUM (Pmode, dest, tmp2)); ++ return dest; ++ } ++ else ++ emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); + + if (TARGET_CMODEL_EXTREME) + { +@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + gcc_unreachable (); + } + else +- reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ { ++ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME) ++ reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r"; ++ else ++ reloc = hi_reloc ? "%le_hi20" : "%le_lo12"; ++ } + break; + + case SYMBOL_TLSGD: +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 996df66e8..02c537d4c 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -73,6 +73,7 @@ + UNSPEC_LOAD_FROM_GOT + UNSPEC_PCALAU12I + UNSPEC_PCALAU12I_GR ++ UNSPEC_ADD_TLS_LE_RELAX + UNSPEC_ORI_L_LO12 + UNSPEC_LUI_L_HI20 + UNSPEC_LUI_H_LO20 +@@ -2503,6 +2504,17 @@ + "pcalau12i\t%0,%%pc_hi20(%1)" + [(set_attr "type" "move")]) + ++(define_insn "@add_tls_le_relax" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P [(match_operand:P 1 "register_operand" "r") ++ (match_operand:P 2 "register_operand" "r") ++ (match_operand:P 3 "symbolic_operand")] ++ UNSPEC_ADD_TLS_LE_RELAX))] ++ "HAVE_AS_TLS_LE_RELAXATION" ++ "add.\t%0,%1,%2,%%le_add_r(%3)" ++ [(set_attr "type" "move")] ++) ++ + (define_insn "@ori_l_lo12" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "register_operand" "r") +diff --git a/gcc/configure b/gcc/configure +index 5842e7a18..eecfe60d6 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then + + $as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h + ++fi ++ ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5 ++$as_echo_n "checking assembler for tls le relaxation support... " >&6; } ++if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_loongarch_tls_le_relaxation_support=no ++ if test x$gcc_cv_as != x; then ++ $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_loongarch_tls_le_relaxation_support=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_tls_le_relaxation_support" >&5 ++$as_echo "$gcc_cv_as_loongarch_tls_le_relaxation_support" >&6; } ++if test $gcc_cv_as_loongarch_tls_le_relaxation_support = yes; then ++ ++$as_echo "#define HAVE_AS_TLS_LE_RELAXATION 1" >>confdefs.h ++ + fi + + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 9c3fd3ad6..d1032440d 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5357,6 +5357,11 @@ x: + beq $a0,$a1,a],, + [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1, + [Define if your assembler supports conditional branch relaxation.])]) ++ gcc_GAS_CHECK_FEATURE([tls le relaxation support], ++ gcc_cv_as_loongarch_tls_le_relaxation_support,, ++ [lu12i.w $t0,%le_hi20_r(a)],, ++ [AC_DEFINE(HAVE_AS_TLS_LE_RELAXATION, 1, ++ [Define if your assembler supports tls le relocation.])]) + ;; + s390*-*-*) + gcc_GAS_CHECK_FEATURE([.gnu_attribute support], +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c +new file mode 100644 +index 000000000..a9a404fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs" } */ ++/* { dg-final { scan-assembler "%le_add_r" { target tls_le_relax } } } */ ++ ++__attribute__ ((tls_model ("local-exec"))) __thread int a; ++ ++void ++test (void) ++{ ++ a = 10; ++} ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index b8bff1a31..20fbd43ee 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10582,6 +10582,18 @@ proc check_effective_target_loongarch_call36_support { } { + } ""] + } + ++# Returns 1 if binutils supports TLS le Relax, 0 otherwise. ++proc check_effective_target_tls_le_relax { } { ++ if [check_effective_target_tls_native] { ++ return [check_no_compiler_messages loongarch_tls_le_relax object { ++ /* Assembly code */ ++ lu12i.w $r12, %le_hi20_r(a) ++ }] ++ } ++ ++ return 0; ++} ++ + # Return 1 if the target does *not* require strict alignment. + + proc check_effective_target_non_strict_align {} { +-- +2.43.0 + diff --git a/0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch b/0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch new file mode 100644 index 0000000..db34f7a --- /dev/null +++ b/0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch @@ -0,0 +1,112 @@ +From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 30 Dec 2023 21:40:11 +0800 +Subject: [PATCH 093/188] LoongArch: Provide fmin/fmax RTL pattern for vectors + +We already had smin/smax RTL pattern using vfmin/vfmax instructions. +But for smin/smax, it's unspecified what will happen if either operand +contains any NaN operands. So we would not vectorize the loop with +-fno-finite-math-only (the default for all optimization levels expect +-Ofast). + +But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we +can also use them and vectorize the loop. + +gcc/ChangeLog: + + * config/loongarch/simd.md (fmax3): New define_insn. + (fmin3): Likewise. + (reduc_fmax_scal_3): New define_expand. + (reduc_fmin_scal_3): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfmax-vfmin.c: New test. +--- + gcc/config/loongarch/simd.md | 31 +++++++++++++++++++ + .../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++ + 2 files changed, 62 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c + +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 93fb39abc..8ac1d75a8 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -426,6 +426,37 @@ + [(set_attr "type" "simd_fcmp") + (set_attr "mode" "")]) + ++; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for ++; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead ++; of smin/smax though, see PR105414 and PR107013. ++ ++(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN]) ++(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")]) ++ ++(define_insn "3" ++ [(set (match_operand:FVEC 0 "register_operand" "=f") ++ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f") ++ (match_operand:FVEC 2 "register_operand" "f")] ++ UNSPEC_FMAXMIN))] ++ "" ++ "v.\t%0,%1,%2" ++ [(set_attr "type" "simd_fminmax") ++ (set_attr "mode" "")]) ++ ++;; ... and also reduc operations. ++(define_expand "reduc__scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:FVEC 1 "register_operand") ++ (const_int UNSPEC_FMAXMIN)] ++ "" ++{ ++ rtx tmp = gen_reg_rtx (mode); ++ loongarch_expand_vector_reduc (gen_3, tmp, operands[1]); ++ emit_insn (gen_vec_extract (operands[0], tmp, ++ const0_rtx)); ++ DONE; ++}) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +new file mode 100644 +index 000000000..811fee361 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=la464 -mlasx" } */ ++/* { dg-final { scan-assembler "\tvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.d" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.d" } } */ ++/* { dg-final { scan-assembler "\tvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\tvfmax\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmin\\.s" } } */ ++/* { dg-final { scan-assembler "\txvfmax\\.s" } } */ ++ ++#define T(OP) __typeof__ (__builtin_##OP (0, 0)) ++ ++#define TEST(OP, LEN) \ ++void \ ++test_##OP##LEN (T (OP) *restrict dest, \ ++ const T (OP) *restrict src1, \ ++ const T (OP) *restrict src2) \ ++{ \ ++ for (int i = 0; i < LEN / sizeof (T(OP)); i++) \ ++ dest[i] = __builtin_##OP (src1[i], src2[i]); \ ++} ++ ++TEST(fmin, 16) ++TEST(fmax, 16) ++TEST(fmin, 32) ++TEST(fmax, 32) ++TEST(fminf, 16) ++TEST(fmaxf, 16) ++TEST(fminf, 32) ++TEST(fmaxf, 32) +-- +2.43.0 + diff --git a/0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch b/0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch new file mode 100644 index 0000000..4c6ae00 --- /dev/null +++ b/0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch @@ -0,0 +1,1484 @@ +From 06a6a571fd557b53f805d990dd1a40a2ab7c1e5c Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Thu, 28 Dec 2023 20:26:46 +0800 +Subject: [PATCH 094/188] LoongArch: Merge constant vector permuatation + implementations. + +There are currently two versions of the implementations of constant +vector permutation: loongarch_expand_vec_perm_const_1 and +loongarch_expand_vec_perm_const_2. The implementations of the two +versions are different. Currently, only the implementation of +loongarch_expand_vec_perm_const_1 is used for 256-bit vectors. We +hope to streamline the code as much as possible while retaining the +better-performing implementation of the two. By repeatedly testing +spec2006 and spec2017, we got the following Merged version. +Compared with the pre-merger version, the number of lines of code +in loongarch.cc has been reduced by 888 lines. At the same time, +the performance of SPECint2006 under Ofast has been improved by 0.97%, +and the performance of SPEC2017 fprate has been improved by 0.27%. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_is_odd_extraction): + Remove useless forward declaration. + (loongarch_is_even_extraction): Remove useless forward declaration. + (loongarch_try_expand_lsx_vshuf_const): Removed. + (loongarch_expand_vec_perm_const_1): Merged. + (loongarch_is_double_duplicate): Removed. + (loongarch_is_center_extraction): Ditto. + (loongarch_is_reversing_permutation): Ditto. + (loongarch_is_di_misalign_extract): Ditto. + (loongarch_is_si_misalign_extract): Ditto. + (loongarch_is_lasx_lowpart_extract): Ditto. + (loongarch_is_op_reverse_perm): Ditto. + (loongarch_is_single_op_perm): Ditto. + (loongarch_is_divisible_perm): Ditto. + (loongarch_is_triple_stride_extract): Ditto. + (loongarch_expand_vec_perm_const_2): Merged. + (loongarch_expand_vec_perm_const): New. + (loongarch_vectorize_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 1308 +++++------------------------ + 1 file changed, 210 insertions(+), 1098 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d1b1950dc..9d2374a46 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8823,143 +8823,6 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) + } + } + +-static bool +-loongarch_is_odd_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_is_even_extraction (struct expand_vec_perm_d *); +- +-static bool +-loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) +-{ +- int i; +- rtx target, op0, op1, sel, tmp; +- rtx rperm[MAX_VECT_LEN]; +- +- if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode +- || d->vmode == E_V4SImode || d->vmode == E_V4SFmode +- || d->vmode == E_V8HImode || d->vmode == E_V16QImode) +- { +- target = d->target; +- op0 = d->op0; +- op1 = d->one_vector_p ? d->op0 : d->op1; +- +- if (GET_MODE (op0) != GET_MODE (op1) +- || GET_MODE (op0) != GET_MODE (target)) +- return false; +- +- if (d->testing_p) +- return true; +- +- /* If match extract-even and extract-odd permutations pattern, use +- * vselect much better than vshuf. */ +- if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, d->nelt)) +- return true; +- +- unsigned char perm2[MAX_VECT_LEN]; +- for (i = 0; i < d->nelt; ++i) +- perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); +- +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, +- perm2, d->nelt)) +- return true; +- } +- +- for (i = 0; i < d->nelt; i += 1) +- { +- rperm[i] = GEN_INT (d->perm[i]); +- } +- +- if (d->vmode == E_V2DFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else if (d->vmode == E_V4SFmode) +- { +- sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- } +- else +- { +- sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); +- emit_move_insn (d->target, sel); +- } +- +- switch (d->vmode) +- { +- case E_V2DFmode: +- emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); +- break; +- case E_V2DImode: +- emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); +- break; +- case E_V4SFmode: +- emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); +- break; +- case E_V4SImode: +- emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); +- break; +- case E_V8HImode: +- emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); +- break; +- case E_V16QImode: +- emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); +- break; +- default: +- break; +- } +- +- return true; +- } +- return false; +-} +- +-static bool +-loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +-{ +- unsigned int i, nelt = d->nelt; +- unsigned char perm2[MAX_VECT_LEN]; +- +- if (d->one_vector_p) +- { +- /* Try interleave with alternating operands. */ +- memcpy (perm2, d->perm, sizeof (perm2)); +- for (i = 1; i < nelt; i += 2) +- perm2[i] += nelt; +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, +- nelt)) +- return true; +- } +- else +- { +- if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, nelt)) +- return true; +- +- /* Try again with swapped operands. */ +- for (i = 0; i < nelt; ++i) +- perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); +- if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, +- nelt)) +- return true; +- } +- +- if (loongarch_expand_lsx_shuffle (d)) +- return true; +- if (loongarch_expand_vec_perm_even_odd (d)) +- return true; +- if (loongarch_expand_vec_perm_interleave (d)) +- return true; +- return false; +-} +- + /* Following are the assist function for const vector permutation support. */ + static bool + loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) +@@ -8991,36 +8854,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_double_duplicate (struct expand_vec_perm_d *d) +-{ +- if (!d->one_vector_p) +- return false; +- +- if (d->nelt < 8) +- return false; +- +- bool result = true; +- unsigned char buf = d->perm[0]; +- +- for (int i = 1; i < d->nelt; i += 2) +- { +- if (d->perm[i] != buf) +- { +- result = false; +- break; +- } +- if (d->perm[i - 1] != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += d->nelt / 4; +- } +- +- return result; +-} +- + static bool + loongarch_is_odd_extraction (struct expand_vec_perm_d *d) + { +@@ -9081,110 +8914,6 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_center_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned buf = d->nelt / 2; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_reversing_permutation (struct expand_vec_perm_d *d) +-{ +- if (!d->one_vector_p) +- return false; +- +- bool result = true; +- unsigned char buf = d->nelt - 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (d->perm[i] != buf) +- { +- result = false; +- break; +- } +- +- buf -= 1; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_di_misalign_extract (struct expand_vec_perm_d *d) +-{ +- if (d->nelt != 4 && d->nelt != 8) +- return false; +- +- bool result = true; +- unsigned char buf; +- +- if (d->nelt == 4) +- { +- buf = 1; +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- +- buf += 1; +- } +- } +- else if (d->nelt == 8) +- { +- buf = 2; +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- +- buf += 1; +- } +- } +- +- return result; +-} +- +-static bool +-loongarch_is_si_misalign_extract (struct expand_vec_perm_d *d) +-{ +- if (d->vmode != E_V8SImode && d->vmode != E_V8SFmode) +- return false; +- bool result = true; +- unsigned char buf = 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- return result; +-} +- + static bool + loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d) + { +@@ -9247,39 +8976,6 @@ loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_lasx_lowpart_extract (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 0; +- +- for (int i = 0; i < d->nelt / 2; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- +- if (result) +- { +- buf = d->nelt; +- for (int i = d->nelt / 2; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 1; +- } +- } +- +- return result; +-} +- + static bool + loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d) + { +@@ -9361,538 +9057,195 @@ loongarch_is_elem_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-inline bool +-loongarch_is_op_reverse_perm (struct expand_vec_perm_d *d) +-{ +- return (d->vmode == E_V4DFmode) +- && d->perm[0] == 2 && d->perm[1] == 3 +- && d->perm[2] == 0 && d->perm[3] == 1; +-} ++/* In LASX, some permutation insn does not have the behavior that gcc expects ++ when compiler wants to emit a vector permutation. ++ ++ 1. What GCC provides via vectorize_vec_perm_const ()'s paramater: ++ When GCC wants to performs a vector permutation, it provides two op ++ reigster, one target register, and a selector. ++ In const vector permutation case, GCC provides selector as a char array ++ that contains original value; in variable vector permuatation ++ (performs via vec_perm insn template), it provides a vector register. ++ We assume that nelt is the elements numbers inside single vector in current ++ 256bit vector mode. ++ ++ 2. What GCC expects to perform: ++ Two op registers (op0, op1) will "combine" into a 512bit temp vector storage ++ that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit ++ is op1, then the elements are indexed as below: ++ 0 ~ nelt - 1 nelt ~ 2 * nelt - 1 ++ |-------------------------|-------------------------| ++ Low 256bit (op0) High 256bit (op1) ++ For example, the second element in op1 (V8SImode) will be indexed with 9. ++ Selector is a vector that has the same mode and number of elements with ++ op0,op1 and target, it's look like this: ++ 0 ~ nelt - 1 ++ |-------------------------| ++ 256bit (selector) ++ It describes which element from 512bit temp vector storage will fit into ++ target's every element slot. ++ GCC expects that every element in selector can be ANY indices of 512bit ++ vector storage (Selector can pick literally any element from op0 and op1, and ++ then fits into any place of target register). This is also what LSX 128bit ++ vshuf.* instruction do similarly, so we can handle 128bit vector permutation ++ by single instruction easily. ++ ++ 3. What LASX permutation instruction does: ++ In short, it just execute two independent 128bit vector permuatation, and ++ it's the reason that we need to do the jobs below. We will explain it. ++ op0, op1, target, and selector will be separate into high 128bit and low ++ 128bit, and do permutation as the description below: ++ ++ a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp ++ vector storage (TVS1), elements are indexed as below: ++ 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ |---------------------|---------------------| TVS1 ++ op0's low 128bit op1's low 128bit ++ op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the ++ same way. ++ 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 ++ |---------------------|---------------------| TVS2 ++ op0's high 128bit op1's high 128bit ++ b) Selector's low 128bit describes which elements from TVS1 will fit into ++ target vector's low 128bit. No TVS2 elements are allowed. ++ c) Selector's high 128bit describes which elements from TVS2 will fit into ++ target vector's high 128bit. No TVS1 elements are allowed. ++ ++ As we can see, if we want to handle vector permutation correctly, we can ++ achieve it in three ways: ++ a) Modify selector's elements, to make sure that every elements can inform ++ correct value that will put into target vector. ++ b) Generate extra instruction before/after permutation instruction, for ++ adjusting op vector or target vector, to make sure target vector's value is ++ what GCC expects. ++ c) Use other instructions to process op and put correct result into target. ++ */ ++ ++/* Implementation of constant vector permuatation. This function identifies ++ recognized pattern of permuation selector argument, and use one or more ++ instruction (s) to finish the permutation job correctly. For unsupported ++ patterns, it will return false. */ + + static bool +-loongarch_is_single_op_perm (struct expand_vec_perm_d *d) ++loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) + { +- bool result = true; ++ bool flag = false; ++ unsigned int i; ++ unsigned char idx; ++ rtx target, op0, op1, sel, tmp; ++ rtx rperm[MAX_VECT_LEN]; ++ unsigned int remapped[MAX_VECT_LEN]; ++ unsigned char perm2[MAX_VECT_LEN]; + +- for (int i = 0; i < d->nelt; i += 1) ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ return loongarch_expand_lsx_shuffle (d); ++ else + { +- if (d->perm[i] >= d->nelt) ++ if (d->one_vector_p) + { +- result = false; +- break; ++ /* Try interleave with alternating operands. */ ++ memcpy (perm2, d->perm, sizeof (perm2)); ++ for (i = 1; i < d->nelt; i += 2) ++ perm2[i] += d->nelt; ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ perm2, d->nelt)) ++ return true; + } +- } +- +- return result; +-} +- +-static bool +-loongarch_is_divisible_perm (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- +- for (int i = 0; i < d->nelt / 2; i += 1) +- { +- if (d->perm[i] >= d->nelt) ++ else + { +- result = false; +- break; +- } +- } +- +- if (result) +- { +- for (int i = d->nelt / 2; i < d->nelt; i += 1) +- { +- if (d->perm[i] < d->nelt) +- { +- result = false; +- break; +- } +- } +- } +- +- return result; +-} +- +-inline bool +-loongarch_is_triple_stride_extract (struct expand_vec_perm_d *d) +-{ +- return (d->vmode == E_V4DImode || d->vmode == E_V4DFmode) +- && d->perm[0] == 1 && d->perm[1] == 4 +- && d->perm[2] == 7 && d->perm[3] == 0; +-} +- +-/* In LASX, some permutation insn does not have the behavior that gcc expects +- * when compiler wants to emit a vector permutation. +- * +- * 1. What GCC provides via vectorize_vec_perm_const ()'s paramater: +- * When GCC wants to performs a vector permutation, it provides two op +- * reigster, one target register, and a selector. +- * In const vector permutation case, GCC provides selector as a char array +- * that contains original value; in variable vector permuatation +- * (performs via vec_perm insn template), it provides a vector register. +- * We assume that nelt is the elements numbers inside single vector in current +- * 256bit vector mode. +- * +- * 2. What GCC expects to perform: +- * Two op registers (op0, op1) will "combine" into a 512bit temp vector storage +- * that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit +- * is op1, then the elements are indexed as below: +- * 0 ~ nelt - 1 nelt ~ 2 * nelt - 1 +- * |-------------------------|-------------------------| +- * Low 256bit (op0) High 256bit (op1) +- * For example, the second element in op1 (V8SImode) will be indexed with 9. +- * Selector is a vector that has the same mode and number of elements with +- * op0,op1 and target, it's look like this: +- * 0 ~ nelt - 1 +- * |-------------------------| +- * 256bit (selector) +- * It describes which element from 512bit temp vector storage will fit into +- * target's every element slot. +- * GCC expects that every element in selector can be ANY indices of 512bit +- * vector storage (Selector can pick literally any element from op0 and op1, and +- * then fits into any place of target register). This is also what LSX 128bit +- * vshuf.* instruction do similarly, so we can handle 128bit vector permutation +- * by single instruction easily. +- * +- * 3. What LASX permutation instruction does: +- * In short, it just execute two independent 128bit vector permuatation, and +- * it's the reason that we need to do the jobs below. We will explain it. +- * op0, op1, target, and selector will be separate into high 128bit and low +- * 128bit, and do permutation as the description below: +- * +- * a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp +- * vector storage (TVS1), elements are indexed as below: +- * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 +- * |---------------------|---------------------| TVS1 +- * op0's low 128bit op1's low 128bit +- * op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the +- * same way. +- * 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1 +- * |---------------------|---------------------| TVS2 +- * op0's high 128bit op1's high 128bit +- * b) Selector's low 128bit describes which elements from TVS1 will fit into +- * target vector's low 128bit. No TVS2 elements are allowed. +- * c) Selector's high 128bit describes which elements from TVS2 will fit into +- * target vector's high 128bit. No TVS1 elements are allowed. +- * +- * As we can see, if we want to handle vector permutation correctly, we can +- * achieve it in three ways: +- * a) Modify selector's elements, to make sure that every elements can inform +- * correct value that will put into target vector. +- b) Generate extra instruction before/after permutation instruction, for +- adjusting op vector or target vector, to make sure target vector's value is +- what GCC expects. +- c) Use other instructions to process op and put correct result into target. +- */ +- +-/* Implementation of constant vector permuatation. This function identifies +- * recognized pattern of permuation selector argument, and use one or more +- * instruction(s) to finish the permutation job correctly. For unsupported +- * patterns, it will return false. */ +- +-static bool +-loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) +-{ +- /* Although we have the LSX vec_perm template, there's still some +- 128bit vector permuatation operations send to vectorize_vec_perm_const. +- In this case, we just simpliy wrap them by single vshuf.* instruction, +- because LSX vshuf.* instruction just have the same behavior that GCC +- expects. */ +- if (GET_MODE_SIZE (d->vmode) == 16) +- return loongarch_try_expand_lsx_vshuf_const (d); +- else +- return false; +- +- bool ok = false, reverse_hi_lo = false, extract_ev_od = false, +- use_alt_op = false; +- unsigned char idx; +- int i; +- rtx target, op0, op1, sel, tmp; +- rtx op0_alt = NULL_RTX, op1_alt = NULL_RTX; +- rtx rperm[MAX_VECT_LEN]; +- unsigned int remapped[MAX_VECT_LEN]; +- +- /* Try to figure out whether is a recognized permutation selector pattern, if +- yes, we will reassign some elements with new value in selector argument, +- and in some cases we will generate some assist insn to complete the +- permutation. (Even in some cases, we use other insn to impl permutation +- instead of xvshuf!) ++ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, ++ d->perm, d->nelt)) ++ return true; + +- Make sure to check d->testing_p is false everytime if you want to emit new +- insn, unless you want to crash into ICE directly. */ +- if (loongarch_is_quad_duplicate (d)) +- { +- /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 } +- copy first elem from original selector to all elem in new selector. */ +- idx = d->perm[0]; +- for (i = 0; i < d->nelt; i += 1) +- { +- remapped[i] = idx; +- } +- /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */ +- } +- else if (loongarch_is_double_duplicate (d)) +- { +- /* Selector example: E_V8SImode, { 1, 1, 3, 3, 5, 5, 7, 7 } +- one_vector_p == true. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = d->perm[i]; +- remapped[i] = idx; +- remapped[i + d->nelt / 2] = idx; ++ /* Try again with swapped operands. */ ++ for (i = 0; i < d->nelt; ++i) ++ perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); ++ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, ++ perm2, d->nelt)) ++ return true; + } +- /* Selector after: { 1, 1, 3, 3, 1, 1, 3, 3 }. */ +- } +- else if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- /* Odd extraction selector sample: E_V4DImode, { 1, 3, 5, 7 } +- Selector after: { 1, 3, 1, 3 }. +- Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 } +- Selector after: { 0, 2, 0, 2 }. */ + +- /* Better implement of extract-even and extract-odd permutations. */ +- if (loongarch_expand_vec_perm_even_odd (d)) ++ if (loongarch_expand_lsx_shuffle (d)) + return true; + +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = d->perm[i]; +- remapped[i] = idx; +- remapped[i + d->nelt / 2] = idx; +- } +- /* Additional insn is required for correct result. See codes below. */ +- extract_ev_od = true; +- } +- else if (loongarch_is_extraction_permutation (d)) +- { +- /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */ +- if (d->perm[0] == 0) ++ if (loongarch_is_odd_extraction (d) ++ || loongarch_is_even_extraction (d)) + { +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remapped[i] = i; +- remapped[i + d->nelt / 2] = i; +- } ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; + } +- else ++ ++ if (loongarch_is_lasx_lowpart_interleave (d) ++ || loongarch_is_lasx_lowpart_interleave_2 (d) ++ || loongarch_is_lasx_highpart_interleave (d) ++ || loongarch_is_lasx_highpart_interleave_2 (d)) + { +- /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- idx = i + d->nelt / 2; +- remapped[i] = idx; +- remapped[i + d->nelt / 2] = idx; +- } ++ if (loongarch_expand_vec_perm_interleave (d)) ++ return true; + } +- /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 } +- { 8, 9, 10, 11, 8, 9, 10, 11 } */ +- } +- else if (loongarch_is_center_extraction (d)) +- { +- /* sample: E_V4DImode, { 2, 3, 4, 5 } +- In this condition, we can just copy high 128bit of op0 and low 128bit +- of op1 to the target register by using xvpermi.q insn. */ +- if (!d->testing_p) ++ ++ if (loongarch_is_quad_duplicate (d)) + { +- emit_move_insn (d->target, d->op1); +- switch (d->vmode) ++ if (d->testing_p) ++ return true; ++ /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }. */ ++ for (i = 0; i < d->nelt; i += 1) + { +- case E_V4DImode: +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V4DFmode: +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, +- d->op0, GEN_INT (0x21))); +- break; +- default: +- break; ++ rperm[i] = GEN_INT (d->perm[0]); + } ++ /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */ ++ flag = true; ++ goto expand_perm_const_end; + } +- ok = true; +- /* Finish the funtion directly. */ +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_reversing_permutation (d)) +- { +- /* Selector sample: E_V8SImode, { 7, 6, 5, 4, 3, 2, 1, 0 } +- one_vector_p == true */ +- idx = d->nelt / 2 - 1; +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remapped[i] = idx; +- remapped[i + d->nelt / 2] = idx; +- idx -= 1; +- } +- /* Selector after: { 3, 2, 1, 0, 3, 2, 1, 0 } +- Additional insn will be generated to swap hi and lo 128bit of target +- register. */ +- reverse_hi_lo = true; +- } +- else if (loongarch_is_di_misalign_extract (d) +- || loongarch_is_si_misalign_extract (d)) +- { +- /* Selector Sample: +- DI misalign: E_V4DImode, { 1, 2, 3, 4 } +- SI misalign: E_V8SImode, { 1, 2, 3, 4, 5, 6, 7, 8 } */ +- if (!d->testing_p) +- { +- /* Copy original op0/op1 value to new temp register. +- In some cases, operand register may be used in multiple place, so +- we need new regiter instead modify original one, to avoid runtime +- crashing or wrong value after execution. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- +- /* Adjust op1 for selecting correct value in high 128bit of target +- register. +- op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x21))); + +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- remapped[i] = d->perm[i]; +- remapped[i + d->nelt / 2] = d->perm[i]; +- } +- /* Selector after: +- DI misalign: { 1, 2, 1, 2 } +- SI misalign: { 1, 2, 3, 4, 1, 2, 3, 4 } */ +- } +- } +- else if (loongarch_is_lasx_lowpart_interleave (d)) +- { +- /* Elements from op0's low 18bit and op1's 128bit are inserted into +- target register alternately. +- sample: E_V4DImode, { 0, 4, 1, 5 } */ +- if (!d->testing_p) +- { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- /* Generate subreg for fitting into insn gen function. */ +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- +- /* Adjust op value in temp register. +- op0 = {0,1,2,3}, op1 = {4,5,0,1} */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x02))); +- /* op0 = {0,1,4,5}, op1 = {4,5,0,1} */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- +- /* Remap indices in selector based on the location of index inside +- selector, and vector element numbers in current vector mode. */ +- +- /* Filling low 128bit of new selector. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- /* value in odd-indexed slot of low 128bit part of selector +- vector. */ +- remapped[i] = i % 2 != 0 ? d->perm[i] - d->nelt / 2 : d->perm[i]; +- } +- /* Then filling the high 128bit. */ +- for (i = d->nelt / 2; i < d->nelt; i += 1) ++ if (loongarch_is_extraction_permutation (d)) ++ { ++ if (d->testing_p) ++ return true; ++ /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */ ++ if (d->perm[0] == 0) + { +- /* value in even-indexed slot of high 128bit part of +- selector vector. */ +- remapped[i] = i % 2 == 0 +- ? d->perm[i] + (d->nelt / 2) * 3 : d->perm[i]; ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ remapped[i] = i; ++ remapped[i + d->nelt / 2] = i; ++ } + } +- } +- } +- else if (loongarch_is_lasx_lowpart_interleave_2 (d)) +- { +- /* Special lowpart interleave case in V32QI vector mode. It does the same +- thing as we can see in if branch that above this line. +- Selector sample: E_V32QImode, +- {0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, +- 9, 10, 11, 12, 13, 14, 15, 40, 41, 42, 43, 44, 45, 46, 47} */ +- if (!d->testing_p) +- { +- /* Solution for this case in very simple - covert op into V4DI mode, +- and do same thing as previous if branch. */ +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x02))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- remapped[0] = 0; +- remapped[1] = 4; +- remapped[2] = 1; +- remapped[3] = 5; +- +- for (i = 0; i < d->nelt; i += 1) ++ else + { +- rperm[i] = GEN_INT (remapped[i]); ++ /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */ ++ for (i = 0; i < d->nelt / 2; i += 1) ++ { ++ idx = i + d->nelt / 2; ++ remapped[i] = idx; ++ remapped[i + d->nelt / 2] = idx; ++ } + } ++ /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 } ++ { 8, 9, 10, 11, 8, 9, 10, 11 } */ + +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm)); +- sel = force_reg (E_V4DImode, sel); +- emit_insn (gen_lasx_xvshuf_d (conv_target, sel, +- conv_op1, conv_op0)); +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_lasx_lowpart_extract (d)) +- { +- /* Copy op0's low 128bit to target's low 128bit, and copy op1's low +- 128bit to target's high 128bit. +- Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ +- if (!d->testing_p) +- { +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- /* We can achieve the expectation by using sinple xvpermi.q insn. */ +- emit_move_insn (conv_target, conv_op1); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_target, conv_target, +- conv_op0, GEN_INT (0x20))); +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_lasx_highpart_interleave (d)) +- { +- /* Similar to lowpart interleave, elements from op0's high 128bit and +- op1's high 128bit are inserted into target regiter alternately. +- Selector sample: E_V8SImode, { 4, 12, 5, 13, 6, 14, 7, 15 } */ +- if (!d->testing_p) +- { +- /* Prepare temp op register. */ +- use_alt_op = true; +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- /* Adjust op value in temp regiter. +- op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x13))); +- /* op0 = { 2, 3, 6, 7 }, op1 = { 6, 7, 2, 3 } */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- /* Remap indices in selector based on the location of index inside +- selector, and vector element numbers in current vector mode. */ +- +- /* Filling low 128bit of new selector. */ +- for (i = 0; i < d->nelt / 2; i += 1) +- { +- /* value in even-indexed slot of low 128bit part of selector +- vector. */ +- remapped[i] = i % 2 == 0 ? d->perm[i] - d->nelt / 2 : d->perm[i]; +- } +- /* Then filling the high 128bit. */ +- for (i = d->nelt / 2; i < d->nelt; i += 1) +- { +- /* value in odd-indexed slot of high 128bit part of selector +- vector. */ +- remapped[i] = i % 2 != 0 +- ? d->perm[i] - (d->nelt / 2) * 3 : d->perm[i]; +- } +- } +- } +- else if (loongarch_is_lasx_highpart_interleave_2 (d)) +- { +- /* Special highpart interleave case in V32QI vector mode. It does the +- same thing as the normal version above. +- Selector sample: E_V32QImode, +- {16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, 52, 53, 54, 55, +- 24, 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63} +- */ +- if (!d->testing_p) +- { +- /* Convert op into V4DImode and do the things. */ +- op1_alt = gen_reg_rtx (d->vmode); +- op0_alt = gen_reg_rtx (d->vmode); +- emit_move_insn (op1_alt, d->op1); +- emit_move_insn (op0_alt, d->op0); +- +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); +- rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, +- d->vmode, 0); +- +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, +- conv_op0, GEN_INT (0x13))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0, conv_op0, +- conv_op1, GEN_INT (0x01))); +- remapped[0] = 2; +- remapped[1] = 6; +- remapped[2] = 3; +- remapped[3] = 7; +- ++ /* Convert remapped selector array to RTL array. */ + for (i = 0; i < d->nelt; i += 1) + { + rperm[i] = GEN_INT (remapped[i]); + } + +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (4, rperm)); +- sel = force_reg (E_V4DImode, sel); +- emit_insn (gen_lasx_xvshuf_d (conv_target, sel, +- conv_op1, conv_op0)); ++ flag = true; ++ goto expand_perm_const_end; + } + +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_elem_duplicate (d)) +- { +- /* Brocast single element (from op0 or op1) to all slot of target +- register. +- Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ +- if (!d->testing_p) ++ if (loongarch_is_elem_duplicate (d)) + { ++ if (d->testing_p) ++ return true; ++ /* Brocast single element (from op0 or op1) to all slot of target ++ register. ++ Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx temp_reg = gen_reg_rtx (d->vmode); + rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, + d->vmode, 0); +- + emit_move_insn (temp_reg, d->op0); + + idx = d->perm[0]; +@@ -9901,7 +9254,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + value that we need to broardcast, because xvrepl128vei does the + broardcast job from every 128bit of source register to + corresponded part of target register! (A deep sigh.) */ +- if (/*idx >= 0 &&*/ idx < d->nelt / 2) ++ if (idx < d->nelt / 2) + { + emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp, + conv_op0, GEN_INT (0x0))); +@@ -9956,310 +9309,75 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) + break; + } + +- /* finish func directly. */ +- ok = true; +- goto expand_perm_const_2_end; +- } +- } +- else if (loongarch_is_op_reverse_perm (d)) +- { +- /* reverse high 128bit and low 128bit in op0. +- Selector sample: E_V4DFmode, { 2, 3, 0, 1 } +- Use xvpermi.q for doing this job. */ +- if (!d->testing_p) +- { +- if (d->vmode == E_V4DImode) +- { +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, d->op0, +- GEN_INT (0x01))); +- } +- else if (d->vmode == E_V4DFmode) +- { +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, d->op0, +- GEN_INT (0x01))); +- } +- else +- { +- gcc_unreachable (); +- } +- } +- +- ok = true; +- goto expand_perm_const_2_end; +- } +- else if (loongarch_is_single_op_perm (d)) +- { +- /* Permutation that only select elements from op0. */ +- if (!d->testing_p) +- { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op0_alt = gen_reg_rtx (d->vmode); +- op1_alt = gen_reg_rtx (d->vmode); +- +- emit_move_insn (op0_alt, d->op0); +- emit_move_insn (op1_alt, d->op1); +- +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, +- d->vmode, 0); +- rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, +- d->vmode, 0); +- +- /* Duplicate op0's low 128bit in op0, then duplicate high 128bit +- in op1. After this, xvshuf.* insn's selector argument can +- access all elements we need for correct permutation result. */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op0, +- GEN_INT (0x00))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, +- GEN_INT (0x11))); +- +- /* In this case, there's no need to remap selector's indices. */ +- for (i = 0; i < d->nelt; i += 1) +- { +- remapped[i] = d->perm[i]; +- } ++ return true; + } +- } +- else if (loongarch_is_divisible_perm (d)) +- { +- /* Divisible perm: +- Low 128bit of selector only selects elements of op0, +- and high 128bit of selector only selects elements of op1. */ + +- if (!d->testing_p) ++expand_perm_const_end: ++ if (flag) + { +- /* Prepare temp register instead of modify original op. */ +- use_alt_op = true; +- op0_alt = gen_reg_rtx (d->vmode); +- op1_alt = gen_reg_rtx (d->vmode); +- +- emit_move_insn (op0_alt, d->op0); +- emit_move_insn (op1_alt, d->op1); +- +- rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, +- d->vmode, 0); +- rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, +- d->vmode, 0); +- rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); +- rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); +- +- /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure +- that selector's low 128bit can access all op0's elements, and +- selector's high 128bit can access all op1's elements. */ +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op0a, conv_op0a, conv_op1, +- GEN_INT (0x02))); +- emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1a, conv_op1a, conv_op0, +- GEN_INT (0x31))); +- +- /* No need to modify indices. */ +- for (i = 0; i < d->nelt;i += 1) ++ /* Copy selector vector from memory to vector register for later insn ++ gen function. ++ If vector's element in floating point value, we cannot fit ++ selector argument into insn gen function directly, because of the ++ insn template definition. As a solution, generate a integral mode ++ subreg of target, then copy selector vector (that is in integral ++ mode) to this subreg. */ ++ switch (d->vmode) + { +- remapped[i] = d->perm[i]; ++ case E_V4DFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, ++ rperm)); ++ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ case E_V8SFmode: ++ sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, ++ rperm)); ++ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ break; ++ default: ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, ++ rperm)); ++ emit_move_insn (d->target, sel); ++ break; + } +- } +- } +- else if (loongarch_is_triple_stride_extract (d)) +- { +- /* Selector sample: E_V4DFmode, { 1, 4, 7, 0 }. */ +- if (!d->testing_p) +- { +- /* Resolve it with brute force modification. */ +- remapped[0] = 1; +- remapped[1] = 2; +- remapped[2] = 3; +- remapped[3] = 0; +- } +- } +- else +- { +- /* When all of the detections above are failed, we will try last +- strategy. +- The for loop tries to detect following rules based on indices' value, +- its position inside of selector vector ,and strange behavior of +- xvshuf.* insn; Then we take corresponding action. (Replace with new +- value, or give up whole permutation expansion.) */ +- for (i = 0; i < d->nelt; i += 1) +- { +- /* % (2 * d->nelt) */ +- idx = d->perm[i]; + +- /* if index is located in low 128bit of selector vector. */ +- if (i < d->nelt / 2) +- { +- /* Fail case 1: index tries to reach element that located in op0's +- high 128bit. */ +- if (idx >= d->nelt / 2 && idx < d->nelt) +- { +- goto expand_perm_const_2_end; +- } +- /* Fail case 2: index tries to reach element that located in +- op1's high 128bit. */ +- if (idx >= (d->nelt + d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; + +- /* Success case: index tries to reach elements that located in +- op1's low 128bit. Apply - (nelt / 2) offset to original +- value. */ +- if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) +- { +- idx -= d->nelt / 2; +- } +- } +- /* if index is located in high 128bit of selector vector. */ +- else ++ /* We FINALLY can generate xvshuf.* insn. */ ++ switch (d->vmode) + { +- /* Fail case 1: index tries to reach element that located in +- op1's low 128bit. */ +- if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } +- /* Fail case 2: index tries to reach element that located in +- op0's low 128bit. */ +- if (idx < (d->nelt / 2)) +- { +- goto expand_perm_const_2_end; +- } +- /* Success case: index tries to reach element that located in +- op0's high 128bit. */ +- if (idx >= d->nelt / 2 && idx < d->nelt) +- { +- idx -= d->nelt / 2; +- } ++ case E_V4DFmode: ++ emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V4DImode: ++ emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); ++ break; ++ case E_V8SFmode: ++ emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V8SImode: ++ emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); ++ break; ++ case E_V16HImode: ++ emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); ++ break; ++ case E_V32QImode: ++ emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ gcc_unreachable (); ++ break; + } +- /* No need to process other case that we did not mentioned. */ +- +- /* Assign with original or processed value. */ +- remapped[i] = idx; +- } +- } +- +- ok = true; +- /* If testing_p is true, compiler is trying to figure out that backend can +- handle this permutation, but doesn't want to generate actual insn. So +- if true, exit directly. */ +- if (d->testing_p) +- { +- goto expand_perm_const_2_end; +- } +- +- /* Convert remapped selector array to RTL array. */ +- for (i = 0; i < d->nelt; i += 1) +- { +- rperm[i] = GEN_INT (remapped[i]); +- } +- +- /* Copy selector vector from memory to vector regiter for later insn gen +- function. +- If vector's element in floating point value, we cannot fit selector +- argument into insn gen function directly, because of the insn template +- definition. As a solution, generate a integral mode subreg of target, +- then copy selector vector (that is in integral mode) to this subreg. */ +- switch (d->vmode) +- { +- case E_V4DFmode: +- sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- break; +- case E_V8SFmode: +- sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); +- tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); +- emit_move_insn (tmp, sel); +- break; +- default: +- sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); +- emit_move_insn (d->target, sel); +- break; +- } +- +- target = d->target; +- /* If temp op registers are requested in previous if branch, then use temp +- register intead of original one. */ +- if (use_alt_op) +- { +- op0 = op0_alt != NULL_RTX ? op0_alt : d->op0; +- op1 = op1_alt != NULL_RTX ? op1_alt : d->op1; +- } +- else +- { +- op0 = d->op0; +- op1 = d->one_vector_p ? d->op0 : d->op1; +- } +- +- /* We FINALLY can generate xvshuf.* insn. */ +- switch (d->vmode) +- { +- case E_V4DFmode: +- emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); +- break; +- case E_V4DImode: +- emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); +- break; +- default: +- gcc_unreachable (); +- break; +- } + +- /* Extra insn for swapping the hi/lo 128bit of target vector register. */ +- if (reverse_hi_lo) +- { +- switch (d->vmode) +- { +- case E_V4DFmode: +- emit_insn (gen_lasx_xvpermi_q_v4df (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V4DImode: +- emit_insn (gen_lasx_xvpermi_q_v4di (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V8SFmode: +- emit_insn (gen_lasx_xvpermi_q_v8sf (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V8SImode: +- emit_insn (gen_lasx_xvpermi_q_v8si (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V16HImode: +- emit_insn (gen_lasx_xvpermi_q_v16hi (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- case E_V32QImode: +- emit_insn (gen_lasx_xvpermi_q_v32qi (d->target, d->target, +- d->target, GEN_INT (0x1))); +- break; +- default: +- break; ++ return true; + } + } +- /* Extra insn required by odd/even extraction. Swapping the second and third +- 64bit in target vector register. */ +- else if (extract_ev_od) +- { +- rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); +- emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, +- GEN_INT (0xD8))); +- } + +-expand_perm_const_2_end: +- return ok; ++ return false; + } + + /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ +@@ -10340,25 +9458,19 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + +- ok = loongarch_expand_vec_perm_const_2 (&d); +- if (ok) +- return ok; +- + start_sequence (); +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + end_sequence (); + return ok; + } + +- ok = loongarch_expand_vec_perm_const_2 (&d); +- if (!ok) +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector + permutation. There are several loongson patterns that are matched + via direct vec_select+vec_concat expansion, but we do not have +- support in loongarch_expand_vec_perm_const_1 to guess the adjustment ++ support in loongarch_expand_vec_perm_const to guess the adjustment + that should be made for a single operand. Just try again with + the original permutation. */ + if (!ok && which == 3) +@@ -10367,7 +9479,7 @@ loongarch_vectorize_vec_perm_const (machine_mode vmode, + d.op1 = op1; + d.one_vector_p = false; + memcpy (d.perm, orig_perm, MAX_VECT_LEN); +- ok = loongarch_expand_vec_perm_const_1 (&d); ++ ok = loongarch_expand_vec_perm_const (&d); + } + + return ok; +-- +2.43.0 + diff --git a/0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch b/0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch new file mode 100644 index 0000000..c606492 --- /dev/null +++ b/0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch @@ -0,0 +1,34 @@ +From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 29 Dec 2023 09:45:15 +0800 +Subject: [PATCH 095/188] LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file. + +After implementing the cost model on the LoongArch architecture, the GCC +compiler code has this feature turned on by default, which causes the +lasx-xvstelm.c file test to fail. Through analysis, this test case can +generate vectorization instructions required for detection only after +disabling the functionality of the cost model with the "-fno-vect-cost-model" +compilation option. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile + option "-fno-vect-cost-model" to dg-options. +--- + gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +index 1a7b0e86f..4b846204a 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O3 -mlasx" } */ ++/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */ + /* { dg-final { scan-assembler-times "xvstelm.w" 8} } */ + + #define LEN 256 +-- +2.43.0 + diff --git a/0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch b/0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch new file mode 100644 index 0000000..ba208db --- /dev/null +++ b/0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch @@ -0,0 +1,47 @@ +From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 5 Jan 2024 11:43:24 +0800 +Subject: [PATCH 096/188] LoongArch: testsuite:Modify the test behavior of the + vect-bic-bitmask-{12, 23}.c file. + +Before modifying the test behavior of the program, dg-do is set to assemble in +vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support +the vector instruction set, it will FAIL to recognize the vector instruction +and fail item will appear in the assembly stage. So set the program's dg-do to +compile. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/vect-bic-bitmask-12.c: Change the default + setting of assembly to compile. + * gcc.dg/vect/vect-bic-bitmask-23.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +- + gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +index 36ec5a8b1..213e4c2a4 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */ + + #include +diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +index 5b4c3b6e1..5dceb4bbc 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c +@@ -1,5 +1,5 @@ + /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */ +-/* { dg-do assemble } */ ++/* { dg-do compile } */ + /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */ + + #include +-- +2.43.0 + diff --git a/0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch b/0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch new file mode 100644 index 0000000..f87269f --- /dev/null +++ b/0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch @@ -0,0 +1,31 @@ +From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 5 Jan 2024 11:43:27 +0800 +Subject: [PATCH 097/188] LoongArch: testsuite:Delete the default run behavior + in pr60510.f. + +When binutils does not support vector instruction sets, the test program fails +because it does not recognize vectorization at the assembly stage. Therefore, +the default run behavior of the program is deleted, so that the behavior of +the program depends on whether the software supports vectorization. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/pr60510.f: Delete the default behavior of the + program. +--- + gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f +index ecd50dd55..c1e11b27d 100644 +--- a/gcc/testsuite/gfortran.dg/vect/pr60510.f ++++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f +@@ -1,4 +1,3 @@ +-! { dg-do run } + ! { dg-require-effective-target vect_double } + ! { dg-require-effective-target vect_intdouble_cvt } + ! { dg-additional-options "-fno-inline -ffast-math" } +-- +2.43.0 + diff --git a/0098-LoongArch-testsuite-Added-additional-vectorization-m.patch b/0098-LoongArch-testsuite-Added-additional-vectorization-m.patch new file mode 100644 index 0000000..ea25625 --- /dev/null +++ b/0098-LoongArch-testsuite-Added-additional-vectorization-m.patch @@ -0,0 +1,157 @@ +From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 5 Jan 2024 11:43:28 +0800 +Subject: [PATCH 098/188] LoongArch: testsuite:Added additional vectorization + "-mlasx" compilation option. + +In the LoongArch architecture, the reason for not adding the 128-bit +vector-width-*hi* instruction template in the GCC back end is that it causes +program performance loss, so we can only add the "-mlasx" compilation option +to use 256-bit vectorization functions in test files. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the + LoongArch architecture, you need to add the "-mlasx" compilation + option to generate vectorized code. + * gcc.dg/vect/slp-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-s16.c: Dito. + * gcc.dg/vect/vect-widen-mult-const-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-half-u8.c: Dito. + * gcc.dg/vect/vect-widen-mult-half.c: Dito. + * gcc.dg/vect/vect-widen-mult-u16.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito. + * gcc.dg/vect/vect-widen-mult-u8.c: Dito. +--- + gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c | 1 + + gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c | 1 + + gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c | 1 + + 10 files changed, 10 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +index 47b1a4366..52ffca82a 100644 +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +index e3bfee333..cd44e551f 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +index 4c95dd201..082c758cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +index 4075f815c..a95e617ad 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +index c4ac88e18..14d96645a 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +@@ -2,6 +2,7 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ + /* { dg-additional-options "-fno-ipa-icf" } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +index ebbf4f5e8..7901dae85 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include "tree-vect.h" + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +index 2e28baae0..21b39953e 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +index d277f0b2b..4827e11b2 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c +@@ -1,6 +1,7 @@ + /* Disabling epilogues until we find a better way to deal with scans. */ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +index f50358802..87eb9e0cb 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */ + + #include + #include "tree-vect.h" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +index 03d137941..507d30c35 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c +@@ -1,5 +1,6 @@ + /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + /* { dg-require-effective-target vect_int } */ ++/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */ + + #include + #include "tree-vect.h" +-- +2.43.0 + diff --git a/0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch b/0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch new file mode 100644 index 0000000..6243bab --- /dev/null +++ b/0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch @@ -0,0 +1,80 @@ +From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 5 Jan 2024 11:43:29 +0800 +Subject: [PATCH 099/188] LoongArch: testsuite:Give up the detection of the + gcc.dg/fma-{3, 4, 6, 7}.c file. + +On the LoongArch architecture, the above four test cases need to be waived +during testing. There are two situations: + +1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on +the LoongArch architecture, the function of the existing fnmsub instruction +is to find the value of -(a*b - c); + +2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on +the LoongArch architecture, the function of the existing fnmadd instruction +is to find the value of -(a*b + c); + +Through the analysis of the above two cases, there will be positive and +negative zero inequality. + +gcc/testsuite/ChangeLog + + * gcc.dg/fma-3.c: The intermediate file corresponding to the + function does not produce the corresponding FNMA symbol, so the test + rules should be skipped when testing. + * gcc.dg/fma-4.c: The intermediate file corresponding to the + function does not produce the corresponding FNMS symbol, so skip the + test rules when testing. + * gcc.dg/fma-6.c: The cause is the same as fma-3.c. + * gcc.dg/fma-7.c: The cause is the same as fma-4.c +--- + gcc/testsuite/gcc.dg/fma-3.c | 2 +- + gcc/testsuite/gcc.dg/fma-4.c | 2 +- + gcc/testsuite/gcc.dg/fma-6.c | 2 +- + gcc/testsuite/gcc.dg/fma-7.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c +index 699aa2c95..6649b54b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-3.c ++++ b/gcc/testsuite/gcc.dg/fma-3.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return c - a * b; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c +index bff928f1f..f1701c196 100644 +--- a/gcc/testsuite/gcc.dg/fma-4.c ++++ b/gcc/testsuite/gcc.dg/fma-4.c +@@ -12,4 +12,4 @@ f2 (double a, double b, double c) + return -(a * b) - c; + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c +index 87258cec4..9e49b62b6 100644 +--- a/gcc/testsuite/gcc.dg/fma-6.c ++++ b/gcc/testsuite/gcc.dg/fma-6.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, -c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c +index f409cc8ee..86aacad7b 100644 +--- a/gcc/testsuite/gcc.dg/fma-7.c ++++ b/gcc/testsuite/gcc.dg/fma-7.c +@@ -64,4 +64,4 @@ f10 (double a, double b, double c) + return -__builtin_fma (a, b, c); + } + +-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */ ++/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */ +-- +2.43.0 + diff --git a/0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch b/0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch new file mode 100644 index 0000000..af4278b --- /dev/null +++ b/0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch @@ -0,0 +1,206 @@ +From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 4 Jan 2024 10:37:53 +0800 +Subject: [PATCH 100/188] LoongArch: Fixed the problem of incorrect judgment of + the immediate field of the [x]vld/[x]vst instruction. + +The [x]vld/[x]vst directive is defined as follows: + [x]vld/[x]vst {x/v}d, rj, si12 + +When not modified, the immediate field of [x]vld/[x]vst is between 10 and +14 bits depending on the type. However, in loongarch_valid_offset_p, the +immediate field is restricted first, so there is no error. However, in +some cases redundant instructions will be generated, see test cases. +Now modify it according to the description in the instruction manual. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_mxld_): + Modify the method of determining the memory offset of [x]vld/[x]vst. + (lasx_mxst_): Likewise. + * config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete. + (loongarch_address_insns): Likewise. + * config/loongarch/lsx.md (lsx_ld_): Likewise. + (lsx_st_): Likewise. + * config/loongarch/predicates.md (aq10b_operand): Likewise. + (aq10h_operand): Likewise. + (aq10w_operand): Likewise. + (aq10d_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-ld-st-imm12.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 ------------------- + gcc/config/loongarch/loongarch.cc | 19 +++----------- + gcc/config/loongarch/lsx.md | 26 ------------------- + gcc/config/loongarch/predicates.md | 16 ------------ + .../gcc.target/loongarch/vect-ld-st-imm12.c | 15 +++++++++++ + 5 files changed, 19 insertions(+), 83 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index dbbf5a136..95c6bae20 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -846,32 +846,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lasx_mxld_" +- [(match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10_operand")] +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], +- INTVAL (operands[2])); +- loongarch_emit_move (operands[0], gen_rtx_MEM (mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lasx_mxst_" +- [(match_operand:LASX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10_operand")] +- "ISA_HAS_LASX" +-{ +- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], +- INTVAL (operands[2])); +- loongarch_emit_move (gen_rtx_MEM (mode, addr), operands[0]); +- DONE; +-}) +- + ;; LASX + (define_insn "add3" + [(set (match_operand:ILASX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9d2374a46..ddb32cea2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode) + + /* We may need to split multiword moves, so make sure that every word + is accessible. */ +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ && GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) + return false; + +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (LSX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, +- loongarch_ldst_scaled_shift (mode))) +- return false; +- +- /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift. */ +- if (LASX_SUPPORTED_MODE_P (mode) +- && !loongarch_signed_immediate_p (INTVAL (x), 10, 0)) +- return false; +- + return true; + } + +@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) + case ADDRESS_REG: + if (lsx_p) + { +- /* LSX LD.* and ST.* supports 10-bit signed offsets. */ +- if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10, +- loongarch_ldst_scaled_shift (mode))) ++ /* LSX LD.* and ST.* supports 12-bit signed offsets. */ ++ if (IMM12_OPERAND (INTVAL (addr.offset))) + return 1; + else + return 0; +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 3e3248ef4..02e89247b 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -812,32 +812,6 @@ + DONE; + }) + +-;; Offset load +-(define_expand "lsx_ld_" +- [(match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10_operand")] +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], +- INTVAL (operands[2])); +- loongarch_emit_move (operands[0], gen_rtx_MEM (mode, addr)); +- DONE; +-}) +- +-;; Offset store +-(define_expand "lsx_st_" +- [(match_operand:LSX 0 "register_operand") +- (match_operand 1 "pmode_register_operand") +- (match_operand 2 "aq10_operand")] +- "ISA_HAS_LSX" +-{ +- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1], +- INTVAL (operands[2])); +- loongarch_emit_move (gen_rtx_MEM (mode, addr), operands[0]); +- DONE; +-}) +- + ;; Integer operations + (define_insn "add3" + [(set (match_operand:ILSX 0 "register_operand" "=f,f,f") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 3698b9103..824a85b36 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -167,22 +167,6 @@ + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) + +-(define_predicate "aq10b_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)"))) +- +-(define_predicate "aq10h_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)"))) +- +-(define_predicate "aq10w_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) +- +-(define_predicate "aq10d_operand" +- (and (match_code "const_int") +- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) +- + (define_predicate "aq12b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +new file mode 100644 +index 000000000..bfc208e4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */ ++/* { dg-final { scan-assembler-not "addi.d" } } */ ++ ++extern short a[1000]; ++extern short b[1000]; ++extern short c[1000]; ++ ++void ++test (void) ++{ ++ for (int i = 501; i < 517; i++) ++ ((int *)(c + 1))[i] = ((int *)(a + 1))[i] + ((int *)(b + 1))[i]; ++} ++ +-- +2.43.0 + diff --git a/0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch b/0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch new file mode 100644 index 0000000..94733f6 --- /dev/null +++ b/0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch @@ -0,0 +1,150 @@ +From f5355c67104cb5d150e1fd3b58807b2ad4e67b7c Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Fri, 5 Jan 2024 15:37:13 +0800 +Subject: [PATCH 101/188] LoongArch: Improve lasx_xvpermi_q_ insn + pattern + +For instruction xvpermi.q, unused bits in operands[3] need be set to 0 to avoid +causing undefined behavior on LA464. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Set the unused bits in operand[3] to 0. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpremi.c: Removed. + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: New test. +--- + gcc/config/loongarch/lasx.md | 9 ++- + .../loongarch/vector/lasx/lasx-xvpermi_q.c | 64 +++++++++++++++++++ + .../loongarch/vector/lasx/lasx-xvpremi.c | 19 ------ + 3 files changed, 72 insertions(+), 20 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c + delete mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 95c6bae20..b4aa8e261 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -635,6 +635,8 @@ + (set_attr "mode" "")]) + + ;; xvpermi.q ++;; Unused bits in operands[3] need be set to 0 to avoid ++;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_" + [(set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -643,7 +645,12 @@ + (match_operand 3 "const_uimm8_operand")] + UNSPEC_LASX_XVPERMI_Q))] + "ISA_HAS_LASX" +- "xvpermi.q\t%u0,%u2,%3" ++{ ++ int mask = 0x33; ++ mask &= INTVAL (operands[3]); ++ operands[3] = GEN_INT (mask); ++ return "xvpermi.q\t%u0,%u2,%3"; ++} + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +new file mode 100644 +index 000000000..dbc29d2fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -0,0 +1,64 @@ ++/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ ++#include "../simd_correctness_check.h" ++#include ++ ++int ++main () ++{ ++ __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; ++ __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; ++ __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; ++ ++ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; ++ long int long_op0, long_op1, long_op2, lont_out, lont_result; ++ long int long_int_out, long_int_result; ++ unsigned int unsigned_int_out, unsigned_int_result; ++ unsigned long int unsigned_long_int_out, unsigned_long_int_result; ++ ++ *((unsigned long*)& __m256i_op0[3]) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op0[2]) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op0[1]) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_op0[0]) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_op1[3]) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op1[2]) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op1[1]) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_op1[0]) = 0x7575757575757575; ++ *((unsigned long*)& __m256i_result[3]) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result[2]) = 0x7fff7fff7fff0000; ++ *((unsigned long*)& __m256i_result[1]) = 0x7fe37fe3001d001d; ++ *((unsigned long*)& __m256i_result[0]) = 0x7fff7fff7fff0000; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op0[3]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op0[2]) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op0[1]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op0[0]) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_op1[3]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op1[2]) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_op1[1]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_op1[0]) = 0x00000000000001fe; ++ *((unsigned long*)& __m256i_result[3]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result[2]) = 0x000000000019001c; ++ *((unsigned long*)& __m256i_result[1]) = 0x0000000000000000; ++ *((unsigned long*)& __m256i_result[0]) = 0x00000000000001fe; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ *((unsigned long*)& __m256i_op0[3]) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op0[2]) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op0[1]) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op0[0]) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_op1[3]) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op1[2]) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_op1[1]) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_op1[0]) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result[3]) = 0xffffffffffffffff; ++ *((unsigned long*)& __m256i_result[2]) = 0xffff0000ffff0000; ++ *((unsigned long*)& __m256i_result[1]) = 0x00ff00ff00ff00ff; ++ *((unsigned long*)& __m256i_result[0]) = 0x00ff00ff00ff00ff; ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c +deleted file mode 100644 +index e9fc1d7d3..000000000 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpremi.c ++++ /dev/null +@@ -1,19 +0,0 @@ +-/* { dg-options "-mlasx -w -fno-strict-aliasing" } */ +-#include "../simd_correctness_check.h" +-#include +- +-int +-main () +-{ +- __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result; +- __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result; +- __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result; +- +- int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; +- long int long_op0, long_op1, long_op2, lont_out, lont_result; +- long int long_int_out, long_int_result; +- unsigned int unsigned_int_out, unsigned_int_result; +- unsigned long int unsigned_long_int_out, unsigned_long_int_result; +- +- return 0; +-} +-- +2.43.0 + diff --git a/0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch b/0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch new file mode 100644 index 0000000..b493452 --- /dev/null +++ b/0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch @@ -0,0 +1,253 @@ +From a321a294407781b2694fe9a3be0099fe38ccf13a Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Fri, 5 Jan 2024 15:38:25 +0800 +Subject: [PATCH 102/188] LoongArch: Implement vec_init where N is a LSX + vector mode + +This patch implements more vec_init optabs that can handle two LSX vectors producing a LASX +vector by concatenating them. When an lsx vector is concatenated with an LSX const_vector of +zeroes, the vec_concatz pattern can be used effectively. For example as below + +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +v8hi a, b; + +v16hi vec_initv16hiv8hi () +{ + return __builtin_shufflevector (a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); +} + +Before this patch: + +vec_initv16hiv8hi: + addi.d $r3,$r3,-64 + .cfi_def_cfa_offset 64 + xvrepli.h $xr0,0 + la.local $r12,.LANCHOR0 + xvst $xr0,$r3,0 + xvst $xr0,$r3,32 + vld $vr0,$r12,0 + vst $vr0,$r3,0 + vld $vr0,$r12,16 + vst $vr0,$r3,32 + xvld $xr1,$r3,32 + xvld $xr2,$r3,32 + xvld $xr0,$r3,0 + xvilvh.h $xr0,$xr1,$xr0 + xvld $xr1,$r3,0 + xvilvl.h $xr1,$xr2,$xr1 + addi.d $r3,$r3,64 + .cfi_def_cfa_offset 0 + xvpermi.q $xr0,$xr1,32 + jr $r1 + +After this patch: + +vec_initv16hiv8hi: + la.local $r12,.LANCHOR0 + vld $vr0,$r12,32 + vld $vr2,$r12,48 + xvilvh.h $xr1,$xr2,$xr0 + xvilvl.h $xr0,$xr2,$xr0 + xvpermi.q $xr1,$xr0,32 + xvst $xr1,$r4,0 + jr $r1 + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_initv32qiv16qi): Rename to .. + (vec_init): .. this, and extend to mode. + (@vec_concatz): New insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): + Handle VALS containing two vectors. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c: New test. +--- + gcc/config/loongarch/lasx.md | 26 +++++++- + gcc/config/loongarch/loongarch.cc | 44 +++++++++++-- + .../loongarch/vector/lasx/lasx-vec-init-2.c | 65 +++++++++++++++++++ + 3 files changed, 128 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index b4aa8e261..803c5dd93 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -465,6 +465,11 @@ + (V16HI "w") + (V32QI "w")]) + ++;; Half modes of all LASX vector modes, in lower-case. ++(define_mode_attr lasxhalf [(V32QI "v16qi") (V16HI "v8hi") ++ (V8SI "v4si") (V4DI "v2di") ++ (V8SF "v4sf") (V4DF "v2df")]) ++ + (define_expand "vec_init" + [(match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "")] +@@ -474,9 +479,9 @@ + DONE; + }) + +-(define_expand "vec_initv32qiv16qi" +- [(match_operand:V32QI 0 "register_operand") +- (match_operand:V16QI 1 "")] ++(define_expand "vec_init" ++ [(match_operand:LASX 0 "register_operand") ++ (match_operand: 1 "")] + "ISA_HAS_LASX" + { + loongarch_expand_vector_group_init (operands[0], operands[1]); +@@ -577,6 +582,21 @@ + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + ++(define_insn "@vec_concatz" ++ [(set (match_operand:LASX 0 "register_operand" "=f") ++ (vec_concat:LASX ++ (match_operand: 1 "nonimmediate_operand") ++ (match_operand: 2 "const_0_operand")))] ++ "ISA_HAS_LASX" ++{ ++ if (MEM_P (operands[1])) ++ return "vld\t%w0,%1"; ++ else ++ return "vori.b\t%w0,%w1,0"; ++} ++ [(set_attr "type" "simd_splat") ++ (set_attr "mode" "")]) ++ + (define_insn "vec_concat" + [(set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ddb32cea2..fccdc21a8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9842,10 +9842,46 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) + void + loongarch_expand_vector_group_init (rtx target, rtx vals) + { +- rtx ops[2] = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), +- force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0], +- ops[1]))); ++ machine_mode vmode = GET_MODE (target); ++ machine_mode half_mode = VOIDmode; ++ rtx low = XVECEXP (vals, 0, 0); ++ rtx high = XVECEXP (vals, 0, 1); ++ ++ switch (vmode) ++ { ++ case E_V32QImode: ++ half_mode = V16QImode; ++ break; ++ case E_V16HImode: ++ half_mode = V8HImode; ++ break; ++ case E_V8SImode: ++ half_mode = V4SImode; ++ break; ++ case E_V4DImode: ++ half_mode = V2DImode; ++ break; ++ case E_V8SFmode: ++ half_mode = V4SFmode; ++ break; ++ case E_V4DFmode: ++ half_mode = V2DFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (high == CONST0_RTX (half_mode)) ++ emit_insn (gen_vec_concatz (vmode, target, low, high)); ++ else ++ { ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); ++ } + } + + /* Expand initialization of a vector which has all same elements. */ +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +new file mode 100644 +index 000000000..7592198c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c +@@ -0,0 +1,65 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fno-vect-cost-model -mlasx" } */ ++/* { dg-final { scan-assembler-times "vld" 12 } } */ ++ ++ ++typedef char v16qi __attribute__ ((vector_size (16))); ++typedef char v32qi __attribute__ ((vector_size (32))); ++ ++typedef short v8hi __attribute__ ((vector_size (16))); ++typedef short v16hi __attribute__ ((vector_size (32))); ++ ++typedef int v4si __attribute__ ((vector_size (16))); ++typedef int v8si __attribute__ ((vector_size (32))); ++ ++typedef long v2di __attribute__ ((vector_size (16))); ++typedef long v4di __attribute__ ((vector_size (32))); ++ ++typedef float v4sf __attribute__ ((vector_size (16))); ++typedef float v8sf __attribute__ ((vector_size (32))); ++ ++typedef double v2df __attribute__ ((vector_size (16))); ++typedef double v4df __attribute__ ((vector_size (32))); ++ ++v16qi a_qi, b_qi; ++v8hi a_hi, b_hi; ++v4si a_si, b_si; ++v2di a_di, b_di; ++v4sf a_sf, b_sf; ++v2df a_df, b_df; ++ ++v32qi ++foo_v32qi () ++{ ++ return __builtin_shufflevector (a_qi, b_qi, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); ++} ++ ++v16hi ++foo_v16qi () ++{ ++ return __builtin_shufflevector (a_hi, b_hi, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); ++} ++ ++v8si ++foo_v8si () ++{ ++ return __builtin_shufflevector (a_si, b_si, 0, 4, 1, 5, 2, 6, 3, 7); ++} ++ ++v4di ++foo_v4di () ++{ ++ return __builtin_shufflevector (a_di, b_di, 0, 2, 1, 3); ++} ++ ++v8sf ++foo_v8sf () ++{ ++ return __builtin_shufflevector (a_sf, b_sf, 0, 4, 1, 5, 2, 6, 3, 7); ++} ++ ++v4df ++foo_v4df () ++{ ++ return __builtin_shufflevector (a_df, b_df, 0, 2, 1, 3); ++} +-- +2.43.0 + diff --git a/0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch b/0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch new file mode 100644 index 0000000..354c3b0 --- /dev/null +++ b/0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch @@ -0,0 +1,533 @@ +From 901663758281d4ce87a75e4d6e45de621b65f0cb Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 8 Jan 2024 09:14:07 +0800 +Subject: [PATCH 103/188] LoongArch: Handle ISA evolution switches along with + other options + +gcc/ChangeLog: + + * config/loongarch/genopts/genstr.sh: Prepend the isa_evolution + variable with the common la_ prefix. + * config/loongarch/genopts/loongarch.opt.in: Mark ISA evolution + flags as saved using TargetVariable. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-def.h: Define evolution_set to + mark changes to the -march default. + * config/loongarch/loongarch-driver.cc: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Define and use ISA evolution + conditions around the la_target structure. + * config/loongarch/loongarch.cc: Same. + * config/loongarch/loongarch.md: Same. + * config/loongarch/loongarch-builtins.cc: Same. + * config/loongarch/loongarch-c.cc: Same. + * config/loongarch/lasx.md: Same. + * config/loongarch/lsx.md: Same. + * config/loongarch/sync.md: Same. +--- + gcc/config/loongarch/genopts/genstr.sh | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 6 ++--- + gcc/config/loongarch/lasx.md | 4 ++-- + gcc/config/loongarch/loongarch-builtins.cc | 6 ++--- + gcc/config/loongarch/loongarch-c.cc | 2 +- + gcc/config/loongarch/loongarch-def.h | 5 +++- + gcc/config/loongarch/loongarch-driver.cc | 5 ++-- + gcc/config/loongarch/loongarch-opts.cc | 17 ++++++++++++- + gcc/config/loongarch/loongarch-opts.h | 24 +++++++++++++++---- + gcc/config/loongarch/loongarch.cc | 24 ++++++++----------- + gcc/config/loongarch/loongarch.md | 12 +++++----- + gcc/config/loongarch/loongarch.opt | 16 ++++++------- + gcc/config/loongarch/lsx.md | 4 ++-- + gcc/config/loongarch/sync.md | 22 ++++++++--------- + 14 files changed, 90 insertions(+), 59 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index bcc616e98..391eca121 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -107,7 +107,7 @@ EOF + print("") + print("m"$3) + gsub(/-/, "_", $3) +- print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)") ++ print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)") + $1=""; $2=""; $3="" + sub(/^ */, "", $0) + print($0) +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 102202b03..a866dab84 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -259,6 +259,6 @@ default value is 4. + ; Features added during ISA evolution. This concept is different from ISA + ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the + ; explanation. These features may be implemented and enumerated with +-; CPUCFG independantly, so we use bit flags to specify them. +-Variable +-HOST_WIDE_INT isa_evolution = 0 ++; CPUCFG independently, so we use bit flags to specify them. ++TargetVariable ++HOST_WIDE_INT la_isa_evolution = 0 +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 803c5dd93..fdfd65e4a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1540,7 +1540,7 @@ + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRECIPE))] +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrecipe.\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) +@@ -1573,7 +1573,7 @@ + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRTE))] +- "ISA_HAS_LASX && TARGET_FRECIPE" ++ "ISA_HAS_LASX && ISA_HAS_FRECIPE" + "xvfrsqrte.\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 85849ed29..e3b4dbc52 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -120,9 +120,9 @@ struct loongarch_builtin_description + AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) + AVAIL_ALL (lsx, ISA_HAS_LSX) + AVAIL_ALL (lasx, ISA_HAS_LASX) +-AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) +-AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) +-AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) ++AVAIL_ALL (frecipe, ISA_HAS_FRECIPE && TARGET_HARD_FLOAT_ABI) ++AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && ISA_HAS_FRECIPE) ++AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + + /* Construct a loongarch_builtin_description from the given arguments. + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index a89477a74..df2a482ad 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -102,7 +102,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + else + builtin_define ("__loongarch_frlen=0"); + +- if (TARGET_HARD_FLOAT && TARGET_FRECIPE) ++ if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE) + builtin_define ("__loongarch_frecipe"); + + if (ISA_HAS_LSX) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index f8f36f0e2..9e5eee0e2 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -132,8 +132,11 @@ struct loongarch_isa + + Using int64_t instead of HOST_WIDE_INT for C compatibility. */ + int64_t evolution; ++ int64_t evolution_set; + +- loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} ++ loongarch_isa () : ++ base (0), fpu (0), simd (0), evolution (0), evolution_set (0) ++ {} + loongarch_isa base_ (int _base) { base = _base; return *this; } + loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } + loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index b3626984d..b84a6eaf7 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -42,9 +42,10 @@ extern struct obstack opts_obstack; + const char* + la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) + { +- /* Initialize all fields of la_target to -1 */ ++ /* Initialize all fields of la_target. */ + loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, +- M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET); ++ M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, ++ 0, 0); + return ""; + } + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index d31becc67..935d09f45 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -140,7 +140,9 @@ static int with_default_simd = 0; + void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, +- int abi_base, int abi_ext, int cmodel) ++ int abi_base, int abi_ext, int cmodel, ++ HOST_WIDE_INT isa_evolution, ++ HOST_WIDE_INT isa_evolution_set) + { + if (!target) + return; +@@ -148,6 +150,8 @@ loongarch_init_target (struct loongarch_target *target, + target->cpu_tune = cpu_tune; + target->isa.fpu = fpu; + target->isa.simd = simd; ++ target->isa.evolution = isa_evolution; ++ target->isa.evolution_set = isa_evolution_set; + target->abi.base = abi_base; + target->abi.ext = abi_ext; + target->cmodel = cmodel; +@@ -184,6 +188,9 @@ loongarch_config_target (struct loongarch_target *target, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + }; + ++ int64_t isa_evolution = target->isa.evolution; ++ int64_t isa_evolution_set = target->isa.evolution_set; ++ + /* 1. Target ABI */ + if (constrained.abi_base) + t.abi.base = target->abi.base; +@@ -394,6 +401,13 @@ config_target_isa: + } + } + ++ /* Apply the ISA evolution feature switches from the user. */ ++ HOST_WIDE_INT isa_evolution_orig = t.isa.evolution; ++ t.isa.evolution &= ~(~isa_evolution & isa_evolution_set); ++ t.isa.evolution |= isa_evolution & isa_evolution_set; ++ ++ /* evolution_set means "what's different from the -march default". */ ++ t.isa.evolution_set = isa_evolution_orig ^ t.isa.evolution; + + /* 4. ABI-ISA compatibility */ + /* Note: +@@ -774,4 +788,5 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; + opts->x_la_opt_simd = target->isa.simd; ++ opts->x_la_isa_evolution = target->isa.evolution; + } +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 8491bee0d..204338553 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -34,7 +34,9 @@ extern struct loongarch_target la_target; + void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, +- int abi_base, int abi_ext, int cmodel); ++ int abi_base, int abi_ext, int cmodel, ++ HOST_WIDE_INT isa_evolutions, ++ HOST_WIDE_INT isa_evolutions_set); + + + /* Handler for "-m" option combinations, +@@ -82,9 +84,23 @@ struct loongarch_flags { + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) + +-#define ISA_HAS_LSX (la_target.isa.simd == ISA_EXT_SIMD_LSX \ +- || la_target.isa.simd == ISA_EXT_SIMD_LASX) +-#define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX) ++#define ISA_HAS_LSX \ ++ (la_target.isa.simd == ISA_EXT_SIMD_LSX \ ++ || la_target.isa.simd == ISA_EXT_SIMD_LASX) ++ ++#define ISA_HAS_LASX \ ++ (la_target.isa.simd == ISA_EXT_SIMD_LASX) ++ ++#define ISA_HAS_FRECIPE \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) ++#define ISA_HAS_DIV32 \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) ++#define ISA_HAS_LAM_BH \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) ++#define ISA_HAS_LAMCAS \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) ++#define ISA_HAS_LD_SEQ_SA \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) + + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index fccdc21a8..b0bb67d60 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3859,7 +3859,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + else + { + *total = loongarch_cost->int_div_si; +- if (TARGET_64BIT && !TARGET_DIV32) ++ if (TARGET_64BIT && !ISA_HAS_DIV32) + *total += COSTS_N_INSNS (2); + } + +@@ -6107,7 +6107,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); +- else if (!TARGET_LD_SEQ_SA) ++ else if (!ISA_HAS_LD_SEQ_SA) + fputs ("dbar\t0x700", file); + break; + +@@ -7509,7 +7509,8 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_init_target (&la_target, + la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu, + la_opt_simd, la_opt_abi_base, la_opt_abi_ext, +- la_opt_cmodel); ++ la_opt_cmodel, opts->x_la_isa_evolution, ++ opts_set->x_la_isa_evolution); + + /* Handle target-specific options: compute defaults/conflicts etc. */ + loongarch_config_target (&la_target, NULL, 0); +@@ -7550,11 +7551,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + if (loongarch_branch_cost == 0) + loongarch_branch_cost = loongarch_cost->branch_cost; + +- /* If the user hasn't disabled a feature added during ISA evolution, +- use the processor's default. */ +- isa_evolution |= (la_target.isa.evolution & +- ~global_options_set.x_isa_evolution); +- + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 + && (opts->x_optimize >= 3 || opts->x_flag_profile_use) +@@ -7685,7 +7681,7 @@ loongarch_option_override_internal (struct gcc_options *opts, + } + if (loongarch_recip) + recip_mask |= RECIP_MASK_ALL; +- if (!TARGET_FRECIPE) ++ if (!ISA_HAS_FRECIPE) + recip_mask = RECIP_MASK_NONE; + } + +@@ -10875,11 +10871,11 @@ loongarch_asm_code_end (void) + loongarch_cpu_strings [la_target.cpu_tune]); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings [la_target.isa.base]); +- DUMP_FEATURE (TARGET_FRECIPE); +- DUMP_FEATURE (TARGET_DIV32); +- DUMP_FEATURE (TARGET_LAM_BH); +- DUMP_FEATURE (TARGET_LAMCAS); +- DUMP_FEATURE (TARGET_LD_SEQ_SA); ++ DUMP_FEATURE (ISA_HAS_FRECIPE); ++ DUMP_FEATURE (ISA_HAS_DIV32); ++ DUMP_FEATURE (ISA_HAS_LAM_BH); ++ DUMP_FEATURE (ISA_HAS_LAMCAS); ++ DUMP_FEATURE (ISA_HAS_LD_SEQ_SA); + } + + fputs ("\n\n", asm_out_file); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 02c537d4c..23653a2b0 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -425,7 +425,7 @@ + + ;; A mode for anything legal as a input of a div or mod instruction. + (define_mode_iterator DIV [(DI "TARGET_64BIT") +- (SI "!TARGET_64BIT || TARGET_DIV32")]) ++ (SI "!TARGET_64BIT || ISA_HAS_DIV32")]) + + ;; In GPR templates, a string like "mul." will expand to "mul.w" in the + ;; 32-bit version and "mul.d" in the 64-bit version. +@@ -941,7 +941,7 @@ + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RECIPE))] +- "TARGET_FRECIPE" ++ "ISA_HAS_FRECIPE" + "frecipe.\t%0,%1" + [(set_attr "type" "frecipe") + (set_attr "mode" "") +@@ -954,7 +954,7 @@ + (match_operand:GPR 2 "register_operand")))] + "" + { +- if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !TARGET_DIV32) ++ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) + { + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); +@@ -994,7 +994,7 @@ + (sign_extend + (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") + (match_operand:SI 2 "register_operand" "r,r,r"))))] +- "TARGET_64BIT && TARGET_DIV32" ++ "TARGET_64BIT && ISA_HAS_DIV32" + { + return loongarch_output_division (".w\t%0,%1,%2", operands); + } +@@ -1014,7 +1014,7 @@ + (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0") + (match_operand:DI 2 "register_operand" "r,r,r")) 0)] + UNSPEC_FAKE_ANY_DIV)))] +- "TARGET_64BIT && !TARGET_DIV32" ++ "TARGET_64BIT && !ISA_HAS_DIV32" + { + return loongarch_output_division (".w\t%0,%1,%2", operands); + } +@@ -1197,7 +1197,7 @@ + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRTE))] +- "TARGET_FRECIPE" ++ "ISA_HAS_FRECIPE" + "frsqrte.\t%0,%1" + [(set_attr "type" "frsqrte") + (set_attr "mode" "")]) +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 56f6a9564..b5a46df4e 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -267,26 +267,26 @@ default value is 4. + ; Features added during ISA evolution. This concept is different from ISA + ; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the + ; explanation. These features may be implemented and enumerated with +-; CPUCFG independantly, so we use bit flags to specify them. +-Variable +-HOST_WIDE_INT isa_evolution = 0 ++; CPUCFG independently, so we use bit flags to specify them. ++TargetVariable ++HOST_WIDE_INT la_isa_evolution = 0 + + mfrecipe +-Target Mask(ISA_FRECIPE) Var(isa_evolution) ++Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. + + mdiv32 +-Target Mask(ISA_DIV32) Var(isa_evolution) ++Target Mask(ISA_DIV32) Var(la_isa_evolution) + Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. + + mlam-bh +-Target Mask(ISA_LAM_BH) Var(isa_evolution) ++Target Mask(ISA_LAM_BH) Var(la_isa_evolution) + Support am{swap/add}[_db].{b/h} instructions. + + mlamcas +-Target Mask(ISA_LAMCAS) Var(isa_evolution) ++Target Mask(ISA_LAMCAS) Var(la_isa_evolution) + Support amcas[_db].{b/h/w/d} instructions. + + mld-seq-sa +-Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution) ++Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution) + Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 02e89247b..612377436 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -1479,7 +1479,7 @@ + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRECIPE))] +- "ISA_HAS_LSX && TARGET_FRECIPE" ++ "ISA_HAS_LSX && ISA_HAS_FRECIPE" + "vfrecipe.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) +@@ -1512,7 +1512,7 @@ + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRTE))] +- "ISA_HAS_LSX && TARGET_FRECIPE" ++ "ISA_HAS_LSX && ISA_HAS_FRECIPE" + "vfrsqrte.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index a678e7131..5da5c2780 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -124,9 +124,9 @@ + return "ld.\t%0,%1\\n\\t" + "dbar\t0x14"; + case MEMMODEL_RELAXED: +- return TARGET_LD_SEQ_SA ? "ld.\t%0,%1" +- : "ld.\t%0,%1\\n\\t" +- "dbar\t0x700"; ++ return ISA_HAS_LD_SEQ_SA ? "ld.\t%0,%1" ++ : "ld.\t%0,%1\\n\\t" ++ "dbar\t0x700"; + + default: + /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, +@@ -193,7 +193,7 @@ + (match_operand:SHORT 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amadd%A2.\t$zero,%z1,%0" + [(set (attr "length") (const_int 4))]) + +@@ -230,7 +230,7 @@ + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand" "r"))] +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amswap%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 4))]) + +@@ -266,7 +266,7 @@ + (match_operand:QHWD 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand")] ;; mod_s + UNSPEC_COMPARE_AND_SWAP))] +- "TARGET_LAMCAS" ++ "ISA_HAS_LAMCAS" + "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" + [(set (attr "length") (const_int 8))]) + +@@ -296,7 +296,7 @@ + + operands[6] = mod_s; + +- if (TARGET_LAMCAS) ++ if (ISA_HAS_LAMCAS) + emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], + operands[3], operands[4], + operands[6])); +@@ -422,7 +422,7 @@ + + operands[6] = mod_s; + +- if (TARGET_LAMCAS) ++ if (ISA_HAS_LAMCAS) + emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], + operands[3], operands[4], + operands[6])); +@@ -642,7 +642,7 @@ + (match_operand:SHORT 2 "register_operand"))] + "" + { +- if (TARGET_LAM_BH) ++ if (ISA_HAS_LAM_BH) + emit_insn (gen_atomic_exchange_short (operands[0], operands[1], operands[2], operands[3])); + else + { +@@ -663,7 +663,7 @@ + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] +- "TARGET_LAM_BH" ++ "ISA_HAS_LAM_BH" + "amadd%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 4))]) + +@@ -678,7 +678,7 @@ + UNSPEC_SYNC_OLD_OP))] + "" + { +- if (TARGET_LAM_BH) ++ if (ISA_HAS_LAM_BH) + emit_insn (gen_atomic_fetch_add_short (operands[0], operands[1], + operands[2], operands[3])); + else +-- +2.43.0 + diff --git a/0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch b/0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch new file mode 100644 index 0000000..d739b01 --- /dev/null +++ b/0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch @@ -0,0 +1,220 @@ +From 282b0847a86fab49fb3582371647fa4cb2d941ed Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 8 Jan 2024 09:14:08 +0800 +Subject: [PATCH 104/188] LoongArch: Rename ISA_BASE_LA64V100 to ISA_BASE_LA64 + +LoongArch ISA manual v1.10 suggests that software should not depend on +the ISA version number for marking processor features. The ISA version +number is now defined as a collective name of individual ISA evolutions. +Since there is a independent ISA evolution mask now, we can drop the +version information from the base ISA. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Rename. + * config/loongarch/genopts/loongarch.opt.in: Same. + * config/loongarch/loongarch-cpu.cc: Same. + * config/loongarch/loongarch-def.cc: Same. + * config/loongarch/loongarch-def.h: Same. + * config/loongarch/loongarch-opts.cc: Same. + * config/loongarch/loongarch-opts.h: Same. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/loongarch.opt: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 2 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 2 +- + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-def.cc | 14 +++++++------- + gcc/config/loongarch/loongarch-def.h | 6 +++--- + gcc/config/loongarch/loongarch-opts.cc | 10 +++++----- + gcc/config/loongarch/loongarch-opts.h | 2 +- + gcc/config/loongarch/loongarch-str.h | 2 +- + gcc/config/loongarch/loongarch.opt | 2 +- + 9 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 411ad5696..ce70b8b9c 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -29,7 +29,7 @@ STR_CPU_LA464 la464 + STR_CPU_LA664 la664 + + # Base architecture +-STR_ISA_BASE_LA64V100 la64 ++STR_ISA_BASE_LA64 la64 + + # -mfpu + OPTSTR_ISA_EXT_FPU fpu +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index a866dab84..851d8d1f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -33,7 +33,7 @@ Name(isa_base) Type(int) + Basic ISAs of LoongArch: + + EnumValue +-Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) ++Enum(isa_base) String(@@STR_ISA_BASE_LA64@@) Value(ISA_BASE_LA64) + + ;; ISA extensions / adjustments + Enum +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 7e0625835..551d4f72c 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -133,7 +133,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + switch (cpucfg_cache[1] & 0x3) + { + case 0x02: +- tmp = ISA_BASE_LA64V100; ++ tmp = ISA_BASE_LA64; + break; + + default: +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 843be78e4..533dd0af2 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -48,16 +48,16 @@ array_arch loongarch_cpu_default_isa = + array_arch () + .set (CPU_LOONGARCH64, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64)) + .set (CPU_LA464, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA +@@ -153,7 +153,7 @@ array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () + + array loongarch_isa_base_strings = + array () +- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); ++ .set (ISA_BASE_LA64, STR_ISA_BASE_LA64); + + array loongarch_isa_ext_strings = + array () +@@ -189,15 +189,15 @@ array, N_ABI_BASE_TYPES> + array () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64))) + .set (ABI_BASE_LP64F, + array () + .set (ABI_EXT_BASE, + loongarch_isa () +- .base_ (ISA_BASE_LA64V100) ++ .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU32))) + .set (ABI_BASE_LP64S, + array () + .set (ABI_EXT_BASE, +- loongarch_isa ().base_ (ISA_BASE_LA64V100))); ++ loongarch_isa ().base_ (ISA_BASE_LA64))); +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 9e5eee0e2..a133ea265 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3. If not see + + /* enum isa_base */ + +-/* LoongArch V1.00. */ +-#define ISA_BASE_LA64V100 0 +-#define N_ISA_BASE_TYPES 1 ++/* LoongArch64 */ ++#define ISA_BASE_LA64 0 ++#define N_ISA_BASE_TYPES 1 + extern loongarch_def_array + loongarch_isa_base_strings; + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 935d09f45..cf4c7bc93 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -567,17 +567,17 @@ isa_default_abi (const struct loongarch_isa *isa) + switch (isa->fpu) + { + case ISA_EXT_FPU64: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64D; + break; + + case ISA_EXT_FPU32: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64F; + break; + + case ISA_EXT_NONE: +- if (isa->base >= ISA_BASE_LA64V100) ++ if (isa->base >= ISA_BASE_LA64) + abi.base = ABI_BASE_LP64S; + break; + +@@ -596,8 +596,8 @@ isa_base_compat_p (const struct loongarch_isa *set1, + { + switch (set2->base) + { +- case ISA_BASE_LA64V100: +- return (set1->base >= ISA_BASE_LA64V100); ++ case ISA_BASE_LA64: ++ return (set1->base >= ISA_BASE_LA64); + + default: + gcc_unreachable (); +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 204338553..463812136 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -79,7 +79,7 @@ struct loongarch_flags { + #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + +-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) ++#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) + #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ + || la_target.abi.base == ABI_BASE_LP64F \ + || la_target.abi.base == ABI_BASE_LP64S) +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index a8821acb0..2251df38b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see + #define STR_CPU_LA464 "la464" + #define STR_CPU_LA664 "la664" + +-#define STR_ISA_BASE_LA64V100 "la64" ++#define STR_ISA_BASE_LA64 "la64" + + #define OPTSTR_ISA_EXT_FPU "fpu" + #define STR_NONE "none" +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index b5a46df4e..df7314973 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -41,7 +41,7 @@ Name(isa_base) Type(int) + Basic ISAs of LoongArch: + + EnumValue +-Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) ++Enum(isa_base) String(la64) Value(ISA_BASE_LA64) + + ;; ISA extensions / adjustments + Enum +-- +2.43.0 + diff --git a/0105-LoongArch-Use-enums-for-constants.patch b/0105-LoongArch-Use-enums-for-constants.patch new file mode 100644 index 0000000..9ce3b97 --- /dev/null +++ b/0105-LoongArch-Use-enums-for-constants.patch @@ -0,0 +1,181 @@ +From 907b35525c8abcdfe22152ebce6640dbe3905cce Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 8 Jan 2024 09:14:09 +0800 +Subject: [PATCH 105/188] LoongArch: Use enums for constants + +Target features constants from loongarch-def.h are currently defined as macros. +Switch to enums for better look in the debugger. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h: Define constants with + enums instead of Macros. +--- + gcc/config/loongarch/loongarch-def.h | 115 ++++++++++++++++----------- + 1 file changed, 67 insertions(+), 48 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index a133ea265..28da3ae5f 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -23,12 +23,10 @@ along with GCC; see the file COPYING3. If not see + - ISA extensions (isa_ext), + - base ABI types (abi_base), + - ABI extension types (abi_ext). +- +- - code models (cmodel) +- - other command-line switches (switch) ++ - code models (cmodel) + + These values are primarily used for implementing option handling +- logic in "loongarch.opt", "loongarch-driver.c" and "loongarch-opt.c". ++ logic in "loongarch.opt", "loongarch-driver.cc" and "loongarch-opt.cc". + + As for the result of this option handling process, the following + scheme is adopted to represent the final configuration: +@@ -53,30 +51,40 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def-array.h" + #include "loongarch-tune.h" + +-/* enum isa_base */ + +-/* LoongArch64 */ +-#define ISA_BASE_LA64 0 +-#define N_ISA_BASE_TYPES 1 ++/* ISA base */ ++enum { ++ ISA_BASE_LA64 = 0, /* LoongArch64 */ ++ N_ISA_BASE_TYPES = 1 ++}; ++ + extern loongarch_def_array + loongarch_isa_base_strings; + +-/* enum isa_ext_* */ +-#define ISA_EXT_NONE 0 +-#define ISA_EXT_FPU32 1 +-#define ISA_EXT_FPU64 2 +-#define N_ISA_EXT_FPU_TYPES 3 +-#define ISA_EXT_SIMD_LSX 3 +-#define ISA_EXT_SIMD_LASX 4 +-#define N_ISA_EXT_TYPES 5 ++ ++/* ISA extensions */ ++enum { ++ ISA_EXT_NONE = 0, ++ ISA_EXT_FPU32 = 1, ++ ISA_EXT_FPU64 = 2, ++ N_ISA_EXT_FPU_TYPES = 3, ++ ISA_EXT_SIMD_LSX = 3, ++ ISA_EXT_SIMD_LASX = 4, ++ N_ISA_EXT_TYPES = 5 ++}; ++ + extern loongarch_def_array + loongarch_isa_ext_strings; + +-/* enum abi_base */ +-#define ABI_BASE_LP64D 0 +-#define ABI_BASE_LP64F 1 +-#define ABI_BASE_LP64S 2 +-#define N_ABI_BASE_TYPES 3 ++ ++/* Base ABI */ ++enum { ++ ABI_BASE_LP64D = 0, ++ ABI_BASE_LP64F = 1, ++ ABI_BASE_LP64S = 2, ++ N_ABI_BASE_TYPES = 3 ++}; ++ + extern loongarch_def_array + loongarch_abi_base_strings; + +@@ -90,28 +98,38 @@ extern loongarch_def_array + (abi_base == ABI_BASE_LP64S) + + +-/* enum abi_ext */ +-#define ABI_EXT_BASE 0 +-#define N_ABI_EXT_TYPES 1 ++/* ABI Extension */ ++enum { ++ ABI_EXT_BASE = 0, ++ N_ABI_EXT_TYPES = 1 ++}; ++ + extern loongarch_def_array + loongarch_abi_ext_strings; + +-/* enum cmodel */ +-#define CMODEL_NORMAL 0 +-#define CMODEL_TINY 1 +-#define CMODEL_TINY_STATIC 2 +-#define CMODEL_MEDIUM 3 +-#define CMODEL_LARGE 4 +-#define CMODEL_EXTREME 5 +-#define N_CMODEL_TYPES 6 ++ ++/* Code Model */ ++enum { ++ CMODEL_NORMAL = 0, ++ CMODEL_TINY = 1, ++ CMODEL_TINY_STATIC = 2, ++ CMODEL_MEDIUM = 3, ++ CMODEL_LARGE = 4, ++ CMODEL_EXTREME = 5, ++ N_CMODEL_TYPES = 6 ++}; ++ + extern loongarch_def_array + loongarch_cmodel_strings; + +-/* enum explicit_relocs */ +-#define EXPLICIT_RELOCS_AUTO 0 +-#define EXPLICIT_RELOCS_NONE 1 +-#define EXPLICIT_RELOCS_ALWAYS 2 +-#define N_EXPLICIT_RELOCS_TYPES 3 ++ ++/* Explicit Reloc Type */ ++enum { ++ EXPLICIT_RELOCS_AUTO = 0, ++ EXPLICIT_RELOCS_NONE = 1, ++ EXPLICIT_RELOCS_ALWAYS = 2, ++ N_EXPLICIT_RELOCS_TYPES = 3 ++}; + + /* The common default value for variables whose assignments + are triggered by command-line options. */ +@@ -159,17 +177,18 @@ struct loongarch_target + int cmodel; /* CMODEL_ */ + }; + +-/* CPU properties. */ +-/* index */ +-#define CPU_NATIVE 0 +-#define CPU_ABI_DEFAULT 1 +-#define CPU_LOONGARCH64 2 +-#define CPU_LA464 3 +-#define CPU_LA664 4 +-#define N_ARCH_TYPES 5 +-#define N_TUNE_TYPES 5 +- +-/* parallel tables. */ ++/* CPU model */ ++enum { ++ CPU_NATIVE = 0, ++ CPU_ABI_DEFAULT = 1, ++ CPU_LOONGARCH64 = 2, ++ CPU_LA464 = 3, ++ CPU_LA664 = 4, ++ N_ARCH_TYPES = 5, ++ N_TUNE_TYPES = 5 ++}; ++ ++/* CPU model properties */ + extern loongarch_def_array + loongarch_cpu_strings; + extern loongarch_def_array +-- +2.43.0 + diff --git a/0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch b/0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch new file mode 100644 index 0000000..e0ac56f --- /dev/null +++ b/0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch @@ -0,0 +1,124 @@ +From dc572aebb3a2c9062014ec50764bbc702dbb8a20 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 8 Jan 2024 09:14:10 +0800 +Subject: [PATCH 106/188] LoongArch: Simplify -mexplicit-reloc definitions + +Since we do not need printing or manual parsing of this option, +(whether in the driver or for target attributes to be supported later) +it can be handled in the .opt file framework. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch-strings: Remove explicit-reloc + argument string definitions. + * config/loongarch/loongarch-str.h: Same. + * config/loongarch/genopts/loongarch.opt.in: Mark -m[no-]explicit-relocs + as aliases to -mexplicit-relocs={always,none} + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.cc: Same. +--- + gcc/config/loongarch/genopts/loongarch-strings | 6 ------ + gcc/config/loongarch/genopts/loongarch.opt.in | 8 ++++---- + gcc/config/loongarch/loongarch-str.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 12 ------------ + gcc/config/loongarch/loongarch.opt | 2 +- + 5 files changed, 5 insertions(+), 28 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index ce70b8b9c..99fd4e7cd 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -64,9 +64,3 @@ STR_CMODEL_TS tiny-static + STR_CMODEL_MEDIUM medium + STR_CMODEL_LARGE large + STR_CMODEL_EXTREME extreme +- +-# -mexplicit-relocs +-OPTSTR_EXPLICIT_RELOCS explicit-relocs +-STR_EXPLICIT_RELOCS_AUTO auto +-STR_EXPLICIT_RELOCS_NONE none +-STR_EXPLICIT_RELOCS_ALWAYS always +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 851d8d1f3..f2055b55e 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -181,20 +181,20 @@ Name(explicit_relocs) Type(int) + The code model option names for -mexplicit-relocs: + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO) ++Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE) ++Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE) + + EnumValue +-Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS) ++Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS) + + mexplicit-relocs= + Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET) + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 2251df38b..cacae38c0 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -63,11 +63,6 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs" +-#define STR_EXPLICIT_RELOCS_AUTO "auto" +-#define STR_EXPLICIT_RELOCS_NONE "none" +-#define STR_EXPLICIT_RELOCS_ALWAYS "always" +- + #define OPTSTR_FRECIPE "frecipe" + #define OPTSTR_DIV32 "div32" + #define OPTSTR_LAM_BH "lam-bh" +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b0bb67d60..8cd703caa 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7518,18 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (la_opt_explicit_relocs != M_OPT_UNSET +- && la_opt_explicit_relocs_backward != M_OPT_UNSET) +- error ("do not use %qs (with %qs) and %qs (without %qs) together", +- "-mexplicit-relocs=", "=", +- la_opt_explicit_relocs_backward ? "-mexplicit-relocs" +- : "-mno-explicit-relocs", "="); +- +- if (la_opt_explicit_relocs_backward != M_OPT_UNSET) +- la_opt_explicit_relocs = (la_opt_explicit_relocs_backward +- ? EXPLICIT_RELOCS_ALWAYS +- : EXPLICIT_RELOCS_NONE); +- + if (la_opt_explicit_relocs == M_OPT_UNSET) + la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS + ? (loongarch_mrelax +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index df7314973..d6e337ac2 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -202,7 +202,7 @@ Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) I + Use %reloc() assembly operators. + + mexplicit-relocs +-Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) ++Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-- +2.43.0 + diff --git a/0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch b/0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch new file mode 100644 index 0000000..a5f2b12 --- /dev/null +++ b/0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch @@ -0,0 +1,35 @@ +From f90e31b6dc8c99f6670dee9a120c5dd9fa9a18d9 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Wed, 10 Jan 2024 15:25:21 +0800 +Subject: [PATCH 107/188] LoongArch: testsuite: Add loongarch support to + slp-21.c. + +The function of this test is to check that the compiler supports vectorization +using SLP and vec_{load/store/*}_lanes. However, vec_{load/store/*}_lanes are +not supported on LoongArch, such as the corresponding "st4/ld4" directives on +aarch64. + +gcc/testsuite/ChangeLog: + + * gcc.dg/vect/slp-21.c: Add loongarch. +--- + gcc/testsuite/gcc.dg/vect/slp-21.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c +index 4b83adb98..3b7e92fe8 100644 +--- a/gcc/testsuite/gcc.dg/vect/slp-21.c ++++ b/gcc/testsuite/gcc.dg/vect/slp-21.c +@@ -210,7 +210,7 @@ int main (void) + + Not all vect_perm targets support that, and it's a bit too specific to have + its own effective-target selector, so we just test targets directly. */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* } } } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */ + /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ + +-- +2.43.0 + diff --git a/0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch b/0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch new file mode 100644 index 0000000..858ca17 --- /dev/null +++ b/0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch @@ -0,0 +1,228 @@ +From 9b19eb071fe3826aa61567b927fc95a37f6560f7 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 8 Dec 2023 10:16:48 +0800 +Subject: [PATCH 108/188] LoongArch: Optimized some of the symbolic expansion + instructions generated during bitwise operations. + +There are two mode iterators defined in the loongarch.md: + (define_mode_iterator GPR [SI (DI "TARGET_64BIT")]) + and + (define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) +Replace the mode in the bit arithmetic from GPR to X. + +Since the bitwise operation instruction does not distinguish between 64-bit, +32-bit, etc., it is necessary to perform symbolic expansion if the bitwise +operation is less than 64 bits. +The original definition would have generated a lot of redundant symbolic +extension instructions. This problem is optimized with reference to the +implementation of RISCV. + +Add this patch spec2017 500.perlbench performance improvement by 1.8% + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (one_cmpl2): Replace GPR with X. + (*nor3): Likewise. + (nor3): Likewise. + (*negsi2_extended): New template. + (*si3_internal): Likewise. + (*one_cmplsi2_internal): Likewise. + (*norsi3_internal): Likewise. + (*nsi_internal): Likewise. + (bytepick_w__extend): Modify this template according to the + modified bit operation to make the optimization work. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-bitwise.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../loongarch/sign-extend-bitwise.c | 21 +++++ + 2 files changed, 90 insertions(+), 24 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23653a2b0..6ebf33cbe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -736,7 +736,7 @@ + + (define_insn "sub3" + [(set (match_operand:GPR 0 "register_operand" "=r") +- (minus:GPR (match_operand:GPR 1 "register_operand" "rJ") ++ (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] + "" + "sub.\t%0,%z1,%2" +@@ -1412,13 +1412,13 @@ + [(set_attr "alu_type" "sub") + (set_attr "mode" "")]) + +-(define_insn "one_cmpl2" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (not:GPR (match_operand:GPR 1 "register_operand" "r")))] +- "" +- "nor\t%0,%.,%1" +- [(set_attr "alu_type" "not") +- (set_attr "mode" "")]) ++(define_insn "*negsi2_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))] ++ "TARGET_64BIT" ++ "sub.w\t%0,%.,%1" ++ [(set_attr "alu_type" "sub") ++ (set_attr "mode" "SI")]) + + (define_insn "neg2" + [(set (match_operand:ANYF 0 "register_operand" "=f") +@@ -1438,14 +1438,39 @@ + ;; + + (define_insn "3" +- [(set (match_operand:GPR 0 "register_operand" "=r,r") +- (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r") +- (match_operand:GPR 2 "uns_arith_operand" "r,K")))] ++ [(set (match_operand:X 0 "register_operand" "=r,r") ++ (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r") ++ (match_operand:X 2 "uns_arith_operand" "r,K")))] + "" + "%i2\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + ++(define_insn "*si3_internal" ++ [(set (match_operand:SI 0 "register_operand" "=r,r") ++ (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r") ++ (match_operand:SI 2 "uns_arith_operand" " r,K")))] ++ "TARGET_64BIT" ++ "%i2\t%0,%1,%2" ++ [(set_attr "type" "logical") ++ (set_attr "mode" "SI")]) ++ ++(define_insn "one_cmpl2" ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (not:X (match_operand:X 1 "register_operand" "r")))] ++ "" ++ "nor\t%0,%.,%1" ++ [(set_attr "alu_type" "not") ++ (set_attr "mode" "")]) ++ ++(define_insn "*one_cmplsi2_internal" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (not:SI (match_operand:SI 1 "register_operand" " r")))] ++ "TARGET_64BIT" ++ "nor\t%0,%.,%1" ++ [(set_attr "type" "logical") ++ (set_attr "mode" "SI")]) ++ + (define_insn "and3_extended" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r") +@@ -1561,25 +1586,43 @@ + [(set_attr "type" "logical") + (set_attr "mode" "HI")]) + +-(define_insn "*nor3" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r")) +- (not:GPR (match_operand:GPR 2 "register_operand" "r"))))] ++(define_insn "nor3" ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (and:X (not:X (match_operand:X 1 "register_operand" "%r")) ++ (not:X (match_operand:X 2 "register_operand" "r"))))] + "" + "nor\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + ++(define_insn "*norsi3_internal" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (and:SI (not:SI (match_operand:SI 1 "register_operand" "%r")) ++ (not:SI (match_operand:SI 2 "register_operand" "r"))))] ++ "TARGET_64BIT" ++ "nor\t%0,%1,%2" ++ [(set_attr "type" "logical") ++ (set_attr "mode" "SI")]) ++ + (define_insn "n" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (neg_bitwise:GPR +- (not:GPR (match_operand:GPR 1 "register_operand" "r")) +- (match_operand:GPR 2 "register_operand" "r")))] ++ [(set (match_operand:X 0 "register_operand" "=r") ++ (neg_bitwise:X ++ (not:X (match_operand:X 1 "register_operand" "r")) ++ (match_operand:X 2 "register_operand" "r")))] + "" + "n\t%0,%2,%1" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + ++(define_insn "*nsi_internal" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (neg_bitwise:SI ++ (not:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "TARGET_64BIT" ++ "n\t%0,%2,%1" ++ [(set_attr "type" "logical") ++ (set_attr "mode" "SI")]) + + ;; + ;; .................... +@@ -3167,7 +3210,6 @@ + (label_ref (match_operand 1)) + (pc)))]) + +- + + ;; + ;; .................... +@@ -3967,10 +4009,13 @@ + (define_insn "bytepick_w__extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int )) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount)))))] ++ (subreg:SI ++ (ior:DI (subreg:DI (lshiftrt ++ (match_operand:SI 1 "register_operand" "r") ++ (const_int )) 0) ++ (subreg:DI (ashift ++ (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount)) 0)) 0)))] + "TARGET_64BIT" + "bytepick.w\t%0,%1,%2," + [(set_attr "mode" "SI")]) +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c +new file mode 100644 +index 000000000..5753ef69d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" } } */ ++ ++struct pmop ++{ ++ unsigned int op_pmflags; ++ unsigned int op_pmpermflags; ++}; ++unsigned int PL_hints; ++ ++struct pmop *pmop; ++void ++Perl_newPMOP (int type, int flags) ++{ ++ if (PL_hints & 0x00100000) ++ pmop->op_pmpermflags |= 0x0001; ++ if (PL_hints & 0x00000004) ++ pmop->op_pmpermflags |= 0x0800; ++ pmop->op_pmflags = pmop->op_pmpermflags; ++} +-- +2.43.0 + diff --git a/0109-LoongArch-Implement-option-save-restore.patch b/0109-LoongArch-Implement-option-save-restore.patch new file mode 100644 index 0000000..5f670a0 --- /dev/null +++ b/0109-LoongArch-Implement-option-save-restore.patch @@ -0,0 +1,467 @@ +From 146c85fa8b32d88acacf8645096d004e0c6f2f9c Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Thu, 11 Jan 2024 09:07:10 +0800 +Subject: [PATCH 109/188] LoongArch: Implement option save/restore + +LTO option streaming and target attributes both require per-function +target configuration, which is achieved via option save/restore. + +We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target +context in addition to other automatically maintained option states +(via the "Save" option property in the .opt files). + +Tested on loongarch64-linux-gnu without regression. + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark options with + the "Save" property. + * config/loongarch/loongarch.opt: Same. + * config/loongarch/loongarch-opts.cc: Refresh -mcmodel= state + according to la_target. + * config/loongarch/loongarch.cc: Implement TARGET_OPTION_{SAVE, + RESTORE} for the la_target structure; Rename option conditions + to have the same "la_" prefix. + * config/loongarch/loongarch.h: Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 38 ++++----- + gcc/config/loongarch/loongarch-opts.cc | 7 ++ + gcc/config/loongarch/loongarch.cc | 80 +++++++++++++++---- + gcc/config/loongarch/loongarch.h | 2 +- + gcc/config/loongarch/loongarch.opt | 38 ++++----- + 5 files changed, 111 insertions(+), 54 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f2055b55e..4d6b1902d 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -50,7 +50,7 @@ EnumValue + Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64) + + m@@OPTSTR_ISA_EXT_FPU@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_FPU@@=FPU Generate code for the given FPU. + + m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@ +@@ -82,7 +82,7 @@ EnumValue + Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX) + + m@@OPTSTR_ISA_EXT_SIMD@@= +-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ISA_EXT_SIMD@@=SIMD Generate code for the given SIMD extension. + + m@@STR_ISA_EXT_LSX@@ +@@ -114,11 +114,11 @@ EnumValue + Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) + + m@@OPTSTR_ARCH@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. + + m@@OPTSTR_TUNE@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. + + +@@ -149,31 +149,31 @@ Variable + int la_opt_abi_ext = M_OPT_UNSET + + mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mcheck-zero-division +-Target Mask(CHECK_ZERO_DIV) ++Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. + + mcond-move-int +-Target Var(TARGET_COND_MOVE_INT) Init(1) ++Target Mask(COND_MOVE_INT) Save + Conditional moves for integral are enabled. + + mcond-move-float +-Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Mask(COND_MOVE_FLOAT) Save + Conditional moves for float are enabled. + + mmemcpy +-Target Mask(MEMCPY) ++Target Mask(MEMCPY) Save + Prevent optimizing block moves, which is also the default behavior of -Os. + + mstrict-align +-Target Var(TARGET_STRICT_ALIGN) Init(0) ++Target Mask(STRICT_ALIGN) Save + Do not generate unaligned memory accesses. + + mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + Enum +@@ -198,11 +198,11 @@ Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-Target RejectNegative Var(loongarch_recip) ++Target RejectNegative Var(la_recip) Save + Generate approximate reciprocal divide and square root for better throughput. + + mrecip= +-Target RejectNegative Joined Var(loongarch_recip_name) ++Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + + ; The code model option names for -mcmodel. +@@ -229,29 +229,29 @@ EnumValue + Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save + Specify the code model. + + mdirect-extern-access +-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) ++Target Mask(DIRECT_EXTERN_ACCESS) Save + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++Target Mask(LINKER_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + + mpass-mrelax-to-as +-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + + -param=loongarch-vect-unroll-limit= +-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may + unroll a loop. The default value is 6. + + -param=loongarch-vect-issue-info= +-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index cf4c7bc93..a2b069d83 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -785,8 +785,15 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + opts->x_la_opt_cpu_arch = target->cpu_arch; + opts->x_la_opt_cpu_tune = target->cpu_tune; + ++ /* status of -mcmodel */ ++ opts->x_la_opt_cmodel = target->cmodel; ++ + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; ++ ++ /* status of -msimd */ + opts->x_la_opt_simd = target->isa.simd; ++ ++ /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8cd703caa..533bae5b2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4079,10 +4079,10 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + + /* Use this simple hardware resource model that how many non vld/vst + vector instructions can be issued per cycle. */ +- unsigned int issue_info = loongarch_vect_issue_info; ++ unsigned int issue_info = la_vect_issue_info; + unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1; + unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst); +- uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf); ++ uf = MIN ((unsigned int) la_vect_unroll_limit, uf); + + return 1 << ceil_log2 (uf); + } +@@ -5540,7 +5540,7 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align) + return false; + + HOST_WIDE_INT length = INTVAL (r_length); +- if (length > loongarch_max_inline_memcpy_size) ++ if (length > la_max_inline_memcpy_size) + return false; + + HOST_WIDE_INT align = INTVAL (r_align); +@@ -7518,13 +7518,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + loongarch_update_gcc_opt_status (&la_target, opts, opts_set); + loongarch_cpu_option_override (&la_target, opts, opts_set); + +- if (la_opt_explicit_relocs == M_OPT_UNSET) +- la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (loongarch_mrelax +- ? EXPLICIT_RELOCS_AUTO +- : EXPLICIT_RELOCS_ALWAYS) +- : EXPLICIT_RELOCS_NONE); +- + if (TARGET_ABI_LP64) + flag_pcc_struct_return = 0; + +@@ -7536,8 +7529,8 @@ loongarch_option_override_internal (struct gcc_options *opts, + + /* If the user hasn't specified a branch cost, use the processor's + default. */ +- if (loongarch_branch_cost == 0) +- loongarch_branch_cost = loongarch_cost->branch_cost; ++ if (la_branch_cost == 0) ++ la_branch_cost = loongarch_cost->branch_cost; + + /* Enable sw prefetching at -O3 and higher. */ + if (opts->x_flag_prefetch_loop_arrays < 0 +@@ -7624,9 +7617,9 @@ loongarch_option_override_internal (struct gcc_options *opts, + { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, + }; + +- if (loongarch_recip_name) ++ if (la_recip_name) + { +- char *p = ASTRDUP (loongarch_recip_name); ++ char *p = ASTRDUP (la_recip_name); + char *q; + unsigned int mask, i; + bool invert; +@@ -7667,10 +7660,38 @@ loongarch_option_override_internal (struct gcc_options *opts, + recip_mask |= mask; + } + } +- if (loongarch_recip) ++ if (la_recip) + recip_mask |= RECIP_MASK_ALL; + if (!ISA_HAS_FRECIPE) + recip_mask = RECIP_MASK_NONE; ++ ++#define INIT_TARGET_FLAG(NAME, INIT) \ ++ { \ ++ if (!(target_flags_explicit & MASK_##NAME)) \ ++ { \ ++ if (INIT) \ ++ target_flags |= MASK_##NAME; \ ++ else \ ++ target_flags &= ~MASK_##NAME; \ ++ } \ ++ } ++ ++ /* Enable conditional moves for int and float by default. */ ++ INIT_TARGET_FLAG (COND_MOVE_INT, 1) ++ INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) ++ ++ /* Set mrelax default. */ ++ INIT_TARGET_FLAG (LINKER_RELAXATION, ++ HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++ ++#undef INIT_TARGET_FLAG ++ ++ if (la_opt_explicit_relocs == M_OPT_UNSET) ++ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (TARGET_LINKER_RELAXATION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); + } + + +@@ -7682,6 +7703,31 @@ loongarch_option_override (void) + loongarch_option_override_internal (&global_options, &global_options_set); + } + ++/* Implement TARGET_OPTION_SAVE. */ ++static void ++loongarch_option_save (struct cl_target_option *, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ loongarch_update_gcc_opt_status (&la_target, opts, opts_set); ++} ++ ++/* Implement TARGET_OPTION_RESTORE. */ ++static void ++loongarch_option_restore (struct gcc_options *, ++ struct gcc_options *, ++ struct cl_target_option *ptr) ++{ ++ la_target.cpu_arch = ptr->x_la_opt_cpu_arch; ++ la_target.cpu_tune = ptr->x_la_opt_cpu_tune; ++ ++ la_target.isa.fpu = ptr->x_la_opt_fpu; ++ la_target.isa.simd = ptr->x_la_opt_simd; ++ la_target.isa.evolution = ptr->x_la_isa_evolution; ++ ++ la_target.cmodel = ptr->x_la_opt_cmodel; ++} ++ + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ + + static void +@@ -10880,6 +10926,10 @@ loongarch_asm_code_end (void) + + #undef TARGET_OPTION_OVERRIDE + #define TARGET_OPTION_OVERRIDE loongarch_option_override ++#undef TARGET_OPTION_SAVE ++#define TARGET_OPTION_SAVE loongarch_option_save ++#undef TARGET_OPTION_RESTORE ++#define TARGET_OPTION_RESTORE loongarch_option_restore + + #undef TARGET_LEGITIMIZE_ADDRESS + #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index fbc0f53e4..f54b078b1 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -868,7 +868,7 @@ typedef struct { + /* A C expression for the cost of a branch instruction. A value of + 1 is the default; other values are interpreted relative to that. */ + +-#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost ++#define BRANCH_COST(speed_p, predictable_p) la_branch_cost + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index d6e337ac2..75d230067 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -58,7 +58,7 @@ EnumValue + Enum(isa_ext_fpu) String(64) Value(ISA_EXT_FPU64) + + mfpu= +-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) Init(M_OPT_UNSET) Save + -mfpu=FPU Generate code for the given FPU. + + mfpu=0 +@@ -90,7 +90,7 @@ EnumValue + Enum(isa_ext_simd) String(lasx) Value(ISA_EXT_SIMD_LASX) + + msimd= +-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) ++Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) Init(M_OPT_UNSET) Save + -msimd=SIMD Generate code for the given SIMD extension. + + mlsx +@@ -122,11 +122,11 @@ EnumValue + Enum(cpu_type) String(la664) Value(CPU_LA664) + + march= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -march=PROCESSOR Generate code for the given PROCESSOR ISA. + + mtune= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -mtune=PROCESSOR Generate optimized code for PROCESSOR. + + +@@ -157,31 +157,31 @@ Variable + int la_opt_abi_ext = M_OPT_UNSET + + mbranch-cost= +-Target RejectNegative Joined UInteger Var(loongarch_branch_cost) ++Target RejectNegative Joined UInteger Var(la_branch_cost) Save + -mbranch-cost=COST Set the cost of branches to roughly COST instructions. + + mcheck-zero-division +-Target Mask(CHECK_ZERO_DIV) ++Target Mask(CHECK_ZERO_DIV) Save + Trap on integer divide by zero. + + mcond-move-int +-Target Var(TARGET_COND_MOVE_INT) Init(1) ++Target Mask(COND_MOVE_INT) Save + Conditional moves for integral are enabled. + + mcond-move-float +-Target Var(TARGET_COND_MOVE_FLOAT) Init(1) ++Target Mask(COND_MOVE_FLOAT) Save + Conditional moves for float are enabled. + + mmemcpy +-Target Mask(MEMCPY) ++Target Mask(MEMCPY) Save + Prevent optimizing block moves, which is also the default behavior of -Os. + + mstrict-align +-Target Var(TARGET_STRICT_ALIGN) Init(0) ++Target Mask(STRICT_ALIGN) Save + Do not generate unaligned memory accesses. + + mmax-inline-memcpy-size= +-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) ++Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save + -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + + Enum +@@ -206,11 +206,11 @@ Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + + mrecip +-Target RejectNegative Var(loongarch_recip) ++Target RejectNegative Var(la_recip) Save + Generate approximate reciprocal divide and square root for better throughput. + + mrecip= +-Target RejectNegative Joined Var(loongarch_recip_name) ++Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + + ; The code model option names for -mcmodel. +@@ -237,29 +237,29 @@ EnumValue + Enum(cmodel) String(extreme) Value(CMODEL_EXTREME) + + mcmodel= +-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) ++Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET) Save + Specify the code model. + + mdirect-extern-access +-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) ++Target Mask(DIRECT_EXTERN_ACCESS) Save + Avoid using the GOT to access external symbols. + + mrelax +-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++Target Mask(LINKER_RELAXATION) + Take advantage of linker relaxations to reduce the number of instructions + required to materialize symbol addresses. + + mpass-mrelax-to-as +-Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) ++Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + + -param=loongarch-vect-unroll-limit= +-Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param ++Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may + unroll a loop. The default value is 6. + + -param=loongarch-vect-issue-info= +-Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param ++Target Undocumented Joined UInteger Var(la_vect_issue_info) Init(4) IntegerRange(1, 64) Param + Indicate how many non memory access vector instructions can be issued per + cycle, it's used in unroll factor determination for autovectorizer. The + default value is 4. +-- +2.43.0 + diff --git a/0110-LoongArch-Redundant-sign-extension-elimination-optim.patch b/0110-LoongArch-Redundant-sign-extension-elimination-optim.patch new file mode 100644 index 0000000..6671733 --- /dev/null +++ b/0110-LoongArch-Redundant-sign-extension-elimination-optim.patch @@ -0,0 +1,234 @@ +From 54786cec1f52854a70369a3060ed22b1e070f000 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Thu, 11 Jan 2024 19:36:19 +0800 +Subject: [PATCH 110/188] LoongArch: Redundant sign extension elimination + optimization. + +We found that the current combine optimization pass in gcc cannot handle +the following redundant sign extension situations: + +(insn 77 76 78 5 (set (reg:SI 143) + (plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0) + (const_int 1 [0x1]))) {addsi3} + (expr_list:REG_DEAD (reg/v:DI 104 [ len ]) + (nil))) +(insn 78 77 82 5 (set (reg/v:DI 104 [ len ]) + (sign_extend:DI (reg:SI 143))) {extendsidi2} + (nil)) + +Because reg:SI 143 is not died or set in insn 78, no replacement merge will +be performed for the insn sequence. We adjusted the add template to eliminate +redundant sign extensions during the expand pass. +Adjusted based on upstream comments: +https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (add3): Removed. + (*addsi3): New. + (addsi3): Ditto. + (adddi3): Ditto. + (*addsi3_extended): Removed. + (addsi3_extended): New. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend.c: Moved to... + * gcc.target/loongarch/sign-extend-1.c: ...here. + * gcc.target/loongarch/sign-extend-2.c: New test. +--- + gcc/config/loongarch/loongarch.md | 93 ++++++++++++++----- + .../{sign-extend.c => sign-extend-1.c} | 0 + .../gcc.target/loongarch/sign-extend-2.c | 59 ++++++++++++ + 3 files changed, 128 insertions(+), 24 deletions(-) + rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%) + create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6ebf33cbe..4c7e28ace 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -657,42 +657,87 @@ + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +-(define_insn_and_split "add3" +- [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r") +- (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r") +- (match_operand:GPR 2 "plus__operand" +- "r,I,La,Lb,Lc,Ld,Le")))] ++(define_insn_and_split "*addsi3" ++ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" ++ "r,I,La,Lb,Le")))] + "" + "@ +- add.\t%0,%1,%2 +- addi.\t%0,%1,%2 ++ add.w\t%0,%1,%2 ++ addi.w\t%0,%1,%2 + # + * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \ + return \"addu16i.d\t%0,%1,%2\"; ++ #" ++ "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \ ++ && !ADDU16I_OPERAND (INTVAL (operands[2]))" ++ [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))] ++ { ++ loongarch_split_plus_constant (&operands[2], SImode); ++ } ++ [(set_attr "alu_type" "add") ++ (set_attr "mode" "SI") ++ (set_attr "insn_count" "1,1,2,1,2")]) ++ ++(define_expand "addsi3" ++ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") ++ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r") ++ (match_operand:SI 2 "plus_si_operand" "r,I,La,Le,Lb")))] ++ "TARGET_64BIT" ++{ ++ if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) ++ && ADDU16I_OPERAND (INTVAL (operands[2]))) ++ { ++ rtx t1 = gen_reg_rtx (DImode); ++ rtx t2 = gen_reg_rtx (DImode); ++ rtx t3 = gen_reg_rtx (DImode); ++ emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0)); ++ t2 = operands[2]; ++ emit_insn (gen_adddi3 (t3, t1, t2)); ++ t3 = gen_lowpart (SImode, t3); ++ emit_move_insn (operands[0], t3); ++ DONE; ++ } ++ else ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_addsi3_extended (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "adddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r") ++ (plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r") ++ (match_operand:DI 2 "plus_di_operand" ++ "r,I,La,Lb,Lc,Ld")))] ++ "TARGET_64BIT" ++ "@ ++ add.d\t%0,%1,%2 ++ addi.d\t%0,%1,%2 + # ++ * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \ ++ return \"addu16i.d\t%0,%1,%2\"; + # + #" +- "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \ ++ "&& CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \ + && !ADDU16I_OPERAND (INTVAL (operands[2]))" +- [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) +- (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))] ++ [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) ++ (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] + { +- loongarch_split_plus_constant (&operands[2], mode); ++ loongarch_split_plus_constant (&operands[2], DImode); + } + [(set_attr "alu_type" "add") +- (set_attr "mode" "") +- (set_attr "insn_count" "1,1,2,1,2,2,2") +- (set (attr "enabled") +- (cond +- [(match_test "mode != DImode && which_alternative == 4") +- (const_string "no") +- (match_test "mode != DImode && which_alternative == 5") +- (const_string "no") +- (match_test "mode != SImode && which_alternative == 6") +- (const_string "no")] +- (const_string "yes")))]) +- +-(define_insn_and_split "*addsi3_extended" ++ (set_attr "mode" "DI") ++ (set_attr "insn_count" "1,1,2,1,2,2")]) ++ ++(define_insn_and_split "addsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r") +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +similarity index 100% +rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c +rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +new file mode 100644 +index 000000000..a45dde4f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -0,0 +1,59 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2" } */ ++/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */ ++ ++#include ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++void ++bt_skip_func (const uint32_t len_limit, const uint32_t pos, ++ const uint8_t *const cur, uint32_t cur_match, ++ uint32_t *const son, const uint32_t cyclic_pos, ++ const uint32_t cyclic_size) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (1) ++ { ++ const uint32_t delta = pos - cur_match; ++ uint32_t *pair ++ = son ++ + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) ++ << 1); ++ const uint8_t *pb = cur - delta; ++ uint32_t len = my_min (len0, len1); ++ ++ if (pb[len] == cur[len]) ++ { ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return; ++ } ++ } ++ ++ if (pb[len] < cur[len]) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} +-- +2.43.0 + diff --git a/0111-LoongArch-Redundant-sign-extension-elimination-optim.patch b/0111-LoongArch-Redundant-sign-extension-elimination-optim.patch new file mode 100644 index 0000000..2015969 --- /dev/null +++ b/0111-LoongArch-Redundant-sign-extension-elimination-optim.patch @@ -0,0 +1,56 @@ +From 7bb1a356ca9eefab48d64bd3deeaac081c1ae7ea Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Thu, 11 Jan 2024 19:36:33 +0800 +Subject: [PATCH 111/188] LoongArch: Redundant sign extension elimination + optimization 2. + +Eliminate the redundant sign extension that exists after the conditional +move when the target register is SImode. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_conditional_move): + Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/sign-extend-2.c: Adjust. +--- + gcc/config/loongarch/loongarch.cc | 6 ++++++ + gcc/testsuite/gcc.target/loongarch/sign-extend-2.c | 5 +++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 533bae5b2..13481130b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5367,6 +5367,12 @@ loongarch_expand_conditional_move (rtx *operands) + rtx temp3 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2))); + temp3 = gen_lowpart (GET_MODE (operands[0]), temp3); ++ /* Nonzero in a subreg if it was made when accessing an object that ++ was promoted to a wider mode in accord with the PROMOTED_MODE ++ machine description macro. */ ++ SUBREG_PROMOTED_VAR_P (temp3) = 1; ++ /* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */ ++ SUBREG_PROMOTED_SET (temp3, SRP_SIGNED); + loongarch_emit_move (operands[0], temp3); + } + else +diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +index a45dde4f7..e57a2727d 100644 +--- a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c +@@ -1,6 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2" } */ +-/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */ ++/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump "subreg/s" "expand" } } */ ++/* { dg-final { scan-assembler-not "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" } } */ + + #include + #define my_min(x, y) ((x) < (y) ? (x) : (y)) +-- +2.43.0 + diff --git a/0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch b/0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch new file mode 100644 index 0000000..9df4bcc --- /dev/null +++ b/0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch @@ -0,0 +1,64 @@ +From 191675bdfd4cef0fbcf642f53da82a49bd23a3bf Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 12 Jan 2024 17:06:30 +0800 +Subject: [PATCH 112/188] LoongArch: Assign the '/u' attribute to the mem to + which the global offset table belongs. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_split_symbol): + Assign the '/u' attribute to the mem. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/got-load.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 5 +++++ + gcc/testsuite/g++.target/loongarch/got-load.C | 19 +++++++++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/g++.target/loongarch/got-load.C + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 13481130b..7da00c132 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3198,6 +3198,11 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + rtx mem = gen_rtx_MEM (Pmode, low); + *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem), + UNSPEC_LOAD_FROM_GOT); ++ ++ /* Nonzero in a mem, if the memory is statically allocated and ++ read-only. A common example of the later is a shared library’s ++ global offset table. */ ++ MEM_READONLY_P (mem) = 1; + } + + break; +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +new file mode 100644 +index 000000000..20924c739 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ ++ ++#include ++ ++using namespace std; ++ ++int lr[100005][2]; ++ ++void ++test(void) ++{ ++ int n; ++ ++ cin >> n; ++ for (int i = 0; i < n; ++i) ++ cin >> lr[i][0] >> lr[i][1]; ++} +-- +2.43.0 + diff --git a/0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch b/0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch new file mode 100644 index 0000000..574df99 --- /dev/null +++ b/0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch @@ -0,0 +1,51 @@ +From 1576f83f8cae0ead9de533566ec5f21e7a01f842 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Sat, 13 Jan 2024 15:28:34 +0800 +Subject: [PATCH 113/188] LoongArch: testsuite:Fix fail in gen-vect-{2,25}.c + file. + +1.Added dg-do compile on LoongArch. + When binutils does not support vector instruction sets, an error occurs +because the assembler does not recognize vector instructions. + +2.Added "-mlsx" option for vectorization on LoongArch. + +gcc/testsuite/ChangeLog: + + * gcc.dg/tree-ssa/gen-vect-2.c: Added detection of compilation + behavior and "-mlsx" option on LoongArch. + * gcc.dg/tree-ssa/gen-vect-25.c: Dito. +--- + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c | 2 ++ + gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +index 42171a2fb..395d6f7ee 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +index 60ec27054..cea7f246a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c +@@ -1,6 +1,8 @@ + /* { dg-do run { target vect_cmdline_needed } } */ ++/* { dg-do compile { target { loongarch_sx && {! loongarch_sx_hw } } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ ++/* { dg-additional-options "-mlsx" { target { loongarch*-*-* } } } */ + + #include + +-- +2.43.0 + diff --git a/0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch b/0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch new file mode 100644 index 0000000..17ab11a --- /dev/null +++ b/0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch @@ -0,0 +1,43 @@ +From 167a3f34b308d3d56e816559701c3fb1c4f88c7b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 25 Oct 2024 03:30:35 +0000 +Subject: [PATCH 114/188] LoongArch: Remove constraint z from movsi_internal + +We don't allow SImode in FCC, so constraint z is never really used +here. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (movsi_internal): Remove + constraint z. +--- + gcc/config/loongarch/loongarch.md | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4c7e28ace..23d8dc126 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2197,8 +2197,8 @@ + }) + + (define_insn_and_split "*movsi_internal" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z") +- (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))] ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m") ++ (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))] + "(register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode))" + { return loongarch_output_move (operands[0], operands[1]); } +@@ -2211,7 +2211,7 @@ + DONE; + } + " +- [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") ++ [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "SI")]) + + ;; 16-bit Integer moves +-- +2.43.0 + diff --git a/0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch b/0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch new file mode 100644 index 0000000..f46094a --- /dev/null +++ b/0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch @@ -0,0 +1,47 @@ +From 0929961b9dd57e0dd18e4cccc6ba760706e74f77 Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Wed, 17 Jan 2024 09:24:06 +0800 +Subject: [PATCH 115/188] LoongArch: doc: Add attribute descriptions defined in + the target-supports.exp. + +gcc/ChangeLog: + + * doc/sourcebuild.texi: Add attributes for keywords. +--- + gcc/doc/sourcebuild.texi | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 71c04841d..a1ab0a1cb 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -2292,6 +2292,26 @@ AArch64 target that is able to generate and execute armv8.3-a FJCVTZS + instruction. + @end table + ++@subsubsection LoongArch specific attributes ++ ++@table @code ++@item loongarch_sx ++LoongArch target that generates instructions for SX. ++ ++@item loongarch_asx ++LoongArch target that generates instructions for ASX. ++ ++@item loongarch_sx_hw ++LoongArch target that is able to generate and execute SX code. ++ ++@item loongarch_asx_hw ++LoongArch target that is able to generate and execute ASX code. ++ ++@item loongarch_call36_support ++LoongArch binutils supports call36 relocation. ++ ++@end table ++ + @subsubsection MIPS-specific attributes + + @table @code +-- +2.43.0 + diff --git a/0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch b/0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch new file mode 100644 index 0000000..9720e9d --- /dev/null +++ b/0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch @@ -0,0 +1,70 @@ +From c0b63b89a03c11bf6383f0175b60614d73295463 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 22 Jan 2024 18:07:42 +0800 +Subject: [PATCH 116/188] LoongArch: Disable explicit reloc for TLS LD/GD with + -mexplicit-relocs=auto + +Binutils 2.42 supports TLS LD/GD relaxation which requires the assembler +macro. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + If la_opt_explicit_relocs is EXPLICIT_RELOCS_AUTO, return false + for SYMBOL_TLS_LDM and SYMBOL_TLS_GD. + (loongarch_call_tls_get_addr): Do not split symbols of + SYMBOL_TLS_LDM or SYMBOL_TLS_GD if la_opt_explicit_relocs is + EXPLICIT_RELOCS_AUTO. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Check + for la.tls.ld and la.tls.gd. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + .../loongarch/explicit-relocs-auto-tls-ld-gd.c | 3 ++- + 2 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 7da00c132..5f22b9dd8 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1967,11 +1967,11 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + { + case SYMBOL_TLS_IE: + case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL64: +- /* The linker don't know how to relax TLS accesses or 64-bit +- pc-relative accesses. */ ++ /* TLS IE cannot be relaxed. TLS LE relaxation is different from ++ the normal R_LARCH_RELAX-based relaxation and it **requires** ++ using the explicit %le_{lo12,hi20,add}_r relocs. The linker ++ does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: + /* The linker don't know how to relax GOT accesses in extreme +@@ -2785,7 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +index 957ff98df..ca55fcfc5 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -6,4 +6,5 @@ extern __thread int b __attribute__((visibility("default"))); + + int test() { return a + b; } + +-/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.ld" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.gd" { target tls_native } } } */ +-- +2.43.0 + diff --git a/0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch b/0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch new file mode 100644 index 0000000..85f81c7 --- /dev/null +++ b/0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch @@ -0,0 +1,35 @@ +From 7e10f7b95a598e9471bd1bc77454af43a69eb506 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 23 Jan 2024 19:32:38 +0800 +Subject: [PATCH 117/188] LoongArch: testsuite: Disable stack protector for + got-load.C + +When building GCC with --enable-default-ssp, the stack protector is +enabled for got-load.C, causing additional GOT loads for +__stack_chk_guard. So mem/u will be matched more than 2 times and the +test will fail. + +Disable stack protector to fix this issue. + +gcc/testsuite: + + * g++.target/loongarch/got-load.C (dg-options): Add + -fno-stack-protector. +--- + gcc/testsuite/g++.target/loongarch/got-load.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C b/gcc/testsuite/g++.target/loongarch/got-load.C +index 20924c739..17870176a 100644 +--- a/gcc/testsuite/g++.target/loongarch/got-load.C ++++ b/gcc/testsuite/g++.target/loongarch/got-load.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand" } */ ++/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal -fdump-rtl-expand -fno-stack-protector" } */ + /* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */ + + #include +-- +2.43.0 + diff --git a/0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch b/0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch new file mode 100644 index 0000000..b45cfff --- /dev/null +++ b/0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch @@ -0,0 +1,65 @@ +From 8cab312a44efc4711fb7adad65f1314349295591 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 23 Jan 2024 11:28:09 +0800 +Subject: [PATCH 118/188] LoongArch: Disable TLS type symbols from generating + non-zero offsets. + +TLS gd ld and ie type symbols will generate corresponding GOT entries, +so non-zero offsets cannot be generated. +The address of TLS le type symbol+addend is not implemented in binutils, +so non-zero offset is not generated here for the time being. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For symbols of type tls, non-zero Offset is not generated. +--- + gcc/config/loongarch/loongarch.cc | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5f22b9dd8..9cdd4ed15 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1921,11 +1921,7 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + x = UNSPEC_ADDRESS (x); + } + else if (SYMBOL_REF_P (x) || LABEL_REF_P (x)) +- { +- *symbol_type = loongarch_classify_symbol (x); +- if (*symbol_type == SYMBOL_TLS) +- return true; +- } ++ *symbol_type = loongarch_classify_symbol (x); + else + return false; + +@@ -1936,17 +1932,21 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_TLS_IE: +- case SYMBOL_TLS_LE: +- case SYMBOL_TLSGD: +- case SYMBOL_TLSLDM: + case SYMBOL_PCREL: + case SYMBOL_PCREL64: + /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + ++ /* The following symbol types do not allow non-zero offsets. */ + case SYMBOL_GOT_DISP: ++ case SYMBOL_TLS_IE: ++ case SYMBOL_TLSGD: ++ case SYMBOL_TLSLDM: + case SYMBOL_TLS: ++ /* From an implementation perspective, tls_le symbols are allowed to ++ have non-zero offsets, but currently binutils has not added support, ++ so the generation of non-zero offsets is prohibited here. */ ++ case SYMBOL_TLS_LE: + return false; + } + gcc_unreachable (); +-- +2.43.0 + diff --git a/0119-LoongArch-Remove-vec_concatz-mode-pattern.patch b/0119-LoongArch-Remove-vec_concatz-mode-pattern.patch new file mode 100644 index 0000000..f9df3a6 --- /dev/null +++ b/0119-LoongArch-Remove-vec_concatz-mode-pattern.patch @@ -0,0 +1,75 @@ +From e19c5ba24839d7446f1874b0b33bd61e27e36905 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 24 Jan 2024 17:19:13 +0800 +Subject: [PATCH 119/188] LoongArch: Remove vec_concatz pattern. + +It is incorrect to use vld/vori to implement the vec_concatz because when the LSX +instruction is used to update the value of the vector register, the upper 128 bits of +the vector register will not be zeroed. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (@vec_concatz): Remove this define_insn pattern. + * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): Use vec_concat. +--- + gcc/config/loongarch/lasx.md | 15 --------------- + gcc/config/loongarch/loongarch.cc | 17 ++++++----------- + 2 files changed, 6 insertions(+), 26 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index fdfd65e4a..a5128c30c 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -582,21 +582,6 @@ + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +-(define_insn "@vec_concatz" +- [(set (match_operand:LASX 0 "register_operand" "=f") +- (vec_concat:LASX +- (match_operand: 1 "nonimmediate_operand") +- (match_operand: 2 "const_0_operand")))] +- "ISA_HAS_LASX" +-{ +- if (MEM_P (operands[1])) +- return "vld\t%w0,%1"; +- else +- return "vori.b\t%w0,%w1,0"; +-} +- [(set_attr "type" "simd_splat") +- (set_attr "mode" "")]) +- + (define_insn "vec_concat" + [(set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cdd4ed15..9bd931549 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9912,17 +9912,12 @@ loongarch_expand_vector_group_init (rtx target, rtx vals) + gcc_unreachable (); + } + +- if (high == CONST0_RTX (half_mode)) +- emit_insn (gen_vec_concatz (vmode, target, low, high)); +- else +- { +- if (!register_operand (low, half_mode)) +- low = force_reg (half_mode, low); +- if (!register_operand (high, half_mode)) +- high = force_reg (half_mode, high); +- emit_insn (gen_rtx_SET (target, +- gen_rtx_VEC_CONCAT (vmode, low, high))); +- } ++ if (!register_operand (low, half_mode)) ++ low = force_reg (half_mode, low); ++ if (!register_operand (high, half_mode)) ++ high = force_reg (half_mode, high); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_VEC_CONCAT (vmode, low, high))); + } + + /* Expand initialization of a vector which has all same elements. */ +-- +2.43.0 + diff --git a/0120-LoongArch-Optimize-implementation-of-single-precisio.patch b/0120-LoongArch-Optimize-implementation-of-single-precisio.patch new file mode 100644 index 0000000..35c09d8 --- /dev/null +++ b/0120-LoongArch-Optimize-implementation-of-single-precisio.patch @@ -0,0 +1,107 @@ +From cb9180ef1fb7e7b97a60adc3d3908b9684771cd8 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Wed, 24 Jan 2024 17:44:17 +0800 +Subject: [PATCH 120/188] LoongArch: Optimize implementation of + single-precision floating-point approximate division. + +We found that in the spec17 521.wrf program, some loop invariant code generated +from single-precision floating-point approximate division calculation failed to +propose a loop. This is because the pseudo-register that stores the +intermediate temporary calculation results is rewritten in the implementation +of single-precision floating-point approximate division, failing to propose +invariants in the loop2_invariant pass. To this end, the intermediate temporary +calculation results are stored in new pseudo-registers without destroying the +read-write dependency, so that they could be recognized as loop invariants in +the loop2_invariant pass. +After optimization, the number of instructions of 521.wrf is reduced by 0.18% +compared with before optimization (1716612948501 -> 1713471771364). + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_emit_swdivsf): Adjust. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/invariant-recip.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++---- + .../gcc.target/loongarch/invariant-recip.c | 33 +++++++++++++++++++ + 2 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/invariant-recip.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9bd931549..5877b0acf 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10842,16 +10842,23 @@ void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) + /* x0 = 1./b estimate. */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + unspec))); +- /* 2.0 - b * x0 */ ++ /* e0 = 2.0 - b * x0. */ + emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, b), x0, mtwo))); + +- /* x0 = a * x0 */ + if (a != CONST1_RTX (mode)) +- emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); +- +- /* res = e0 * x0 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ { ++ rtx e1 = gen_reg_rtx (mode); ++ /* e1 = a * x0. */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0))); ++ /* res = e0 * e1. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1))); ++ } ++ else ++ { ++ /* res = e0 * x0. */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); ++ } + } + + static bool +diff --git a/gcc/testsuite/gcc.target/loongarch/invariant-recip.c b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +new file mode 100644 +index 000000000..2f64f6ed5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/invariant-recip.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Ofast -march=loongarch64 -mabi=lp64d -mrecip -mfrecipe -fdump-rtl-loop2_invariant " } */ ++/* { dg-final { scan-rtl-dump "Decided to move dependent invariant" "loop2_invariant" } } */ ++ ++void ++nislfv_rain_plm (int im, int km, float dzl[im][km], float rql[im][km], ++ float dt) ++{ ++ int i, k; ++ float con1, decfl; ++ float dz[km], qn[km], wi[km + 1]; ++ ++ for (i = 0; i < im; i++) ++ { ++ for (k = 0; k < km; k++) ++ { ++ dz[k] = dzl[i][k]; ++ } ++ con1 = 0.05; ++ for (k = km - 1; k >= 0; k--) ++ { ++ decfl = (wi[k + 1] - wi[k]) * dt / dz[k]; ++ if (decfl > con1) ++ { ++ wi[k] = wi[k + 1] - con1 * dz[k] / dt; ++ } ++ } ++ for (k = 0; k < km; k++) ++ { ++ rql[i][k] = qn[k]; ++ } ++ } ++} +-- +2.43.0 + diff --git a/0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch b/0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch new file mode 100644 index 0000000..2d14acb --- /dev/null +++ b/0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch @@ -0,0 +1,71 @@ +From a2baa4807fdfd381c543eb7ea85edf14dc6c8a20 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Tue, 16 Jan 2024 10:32:31 +0800 +Subject: [PATCH 121/188] LoongArch: Define LOGICAL_OP_NON_SHORT_CIRCUIT + +Define LOGICAL_OP_NON_SHORT_CIRCUIT as 0, for a short-circuit branch, use the +short-circuit operation instead of the non-short-circuit operation. + +SPEC2017 performance evaluation shows 1% performance improvement for fprate +GEOMEAN and no obvious regression for others. Especially, 526.blender_r +10.6% +on 3A6000. + +This modification will introduce the following FAIL items: + +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Conditional combines static and invariant" 1 +FAIL: gcc.dg/tree-ssa/copy-headers-8.c scan-tree-dump-times ch2 "Will duplicate bb" 2 +FAIL: gcc.dg/tree-ssa/update-threading.c scan-tree-dump-times optimized "Invalid sum" 0 + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Define. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/short-circuit.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + .../gcc.target/loongarch/short-circuit.c | 19 +++++++++++++++++++ + 2 files changed, 20 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/short-circuit.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index f54b078b1..15261fdc0 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -869,6 +869,7 @@ typedef struct { + 1 is the default; other values are interpreted relative to that. */ + + #define BRANCH_COST(speed_p, predictable_p) la_branch_cost ++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + + /* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for +diff --git a/gcc/testsuite/gcc.target/loongarch/short-circuit.c b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +new file mode 100644 +index 000000000..bed585ee1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/short-circuit.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -fdump-tree-gimple" } */ ++ ++int ++short_circuit (float *a) ++{ ++ float t1x = a[0]; ++ float t2x = a[1]; ++ float t1y = a[2]; ++ float t2y = a[3]; ++ float t1z = a[4]; ++ float t2z = a[5]; ++ ++ if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ++ return 0; ++ ++ return 1; ++} ++/* { dg-final { scan-tree-dump-times "if" 6 "gimple" } } */ +-- +2.43.0 + diff --git a/0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch b/0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch new file mode 100644 index 0000000..32b6461 --- /dev/null +++ b/0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch @@ -0,0 +1,84 @@ +From 5cab5d1a9fb9cfaa0d12d229aa0ee19e0dd55cc5 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Tue, 16 Jan 2024 10:23:20 +0800 +Subject: [PATCH 122/188] LoongArch: Split vec_selects of bottom elements into + simple move + +For below pattern, can be treated as a simple move because floating point +and vector share a common register on loongarch64. + +(set (reg/v:SF 32 $f0 [orig:93 res ] [93]) + (vec_select:SF (reg:V8SF 32 $f0 [115]) + (parallel [ + (const_int 0 [0]) + ]))) + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_extract_0): + New define_insn_and_split patten. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vect-extract.c: New test. +--- + gcc/config/loongarch/lasx.md | 15 ++++++++++++++ + .../gcc.target/loongarch/vect-extract.c | 20 +++++++++++++++++++ + 2 files changed, 35 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-extract.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index a5128c30c..946811e1a 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -746,6 +746,21 @@ + DONE; + }) + ++(define_insn_and_split "vec_extract_0" ++ [(set (match_operand: 0 "register_operand" "=f") ++ (vec_select: ++ (match_operand:FLASX 1 "register_operand" "f") ++ (parallel [(const_int 0)])))] ++ "ISA_HAS_LSX" ++ "#" ++ "&& reload_completed" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); ++} ++ [(set_attr "move_type" "fmove") ++ (set_attr "mode" "")]) ++ + (define_expand "vec_perm" + [(match_operand:LASX 0 "register_operand") + (match_operand:LASX 1 "register_operand") +diff --git a/gcc/testsuite/gcc.target/loongarch/vect-extract.c b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +new file mode 100644 +index 000000000..ce126e3a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vect-extract.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ffast-math -mlasx -fno-vect-cost-model -fno-unroll-loops" } */ ++/* { dg-final { scan-assembler-not "xvpickve.w" } } */ ++/* { dg-final { scan-assembler-not "xvpickve.d" } } */ ++ ++float ++sum_float (float *a, int n) { ++ float res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += a[i]; ++ return res; ++} ++ ++double ++sum_double (double *a, int n) { ++ double res = 0.0; ++ for (int i = 0; i < n; i++) ++ res += a[i]; ++ return res; ++} +-- +2.43.0 + diff --git a/0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch b/0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch new file mode 100644 index 0000000..44a0fdd --- /dev/null +++ b/0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch @@ -0,0 +1,112 @@ +From c4815d70715bed71b8e89888ef19eb43e9171229 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 30 Jan 2024 15:02:32 +0800 +Subject: [PATCH 123/188] LoongArch: Modify the address calculation logic for + obtaining array element values through fp. + +Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + offset) + a x C). +Thereby modifying the register dependencies and optimizing the code. +The value of C is 2 4 or 8. + +The following is the assembly code before and after a loop modification in spec2006 401.bzip: + + old | new + 735 .L71: | 735 .L71: + 736 slli.d $r12,$r15,2 | 736 slli.d $r12,$r15,2 + 737 ldx.w $r13,$r22,$r12 | 737 ldx.w $r13,$r22,$r12 + 738 addi.d $r15,$r15,-1 | 738 addi.d $r15,$r15,-1 + 739 slli.w $r16,$r15,0 | 739 slli.w $r16,$r15,0 + 740 addi.w $r13,$r13,-1 | 740 addi.w $r13,$r13,-1 + 741 slti $r14,$r13,0 | 741 slti $r14,$r13,0 + 742 add.w $r12,$r26,$r13 | 742 add.w $r12,$r26,$r13 + 743 maskeqz $r12,$r12,$r14 | 743 maskeqz $r12,$r12,$r14 + 744 masknez $r14,$r13,$r14 | 744 masknez $r14,$r13,$r14 + 745 or $r12,$r12,$r14 | 745 or $r12,$r12,$r14 + 746 ldx.bu $r14,$r30,$r12 | 746 ldx.bu $r14,$r30,$r12 + 747 lu12i.w $r13,4096>>12 | 747 alsl.d $r14,$r14,$r18,2 + 748 ori $r13,$r13,432 | 748 ldptr.w $r13,$r14,0 + 749 add.d $r13,$r13,$r3 | 749 addi.w $r17,$r13,-1 + 750 alsl.d $r14,$r14,$r13,2 | 750 stptr.w $r17,$r14,0 + 751 ldptr.w $r13,$r14,-1968 | 751 slli.d $r13,$r13,2 + 752 addi.w $r17,$r13,-1 | 752 stx.w $r12,$r22,$r13 + 753 st.w $r17,$r14,-1968 | 753 ldptr.w $r12,$r19,0 + 754 slli.d $r13,$r13,2 | 754 blt $r12,$r16,.L71 + 755 stx.w $r12,$r22,$r13 | 755 .align 4 + 756 ldptr.w $r12,$r18,-2048 | 756 + 757 blt $r12,$r16,.L71 | 757 + 758 .align 4 | 758 + +This patch is ported from riscv's commit r14-3111. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New function. + (loongarch_legitimize_address): Add logical transformation code. +--- + gcc/config/loongarch/loongarch.cc | 43 +++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 5877b0acf..612a9c138 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3215,6 +3215,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + return true; + } + ++/* Helper loongarch_legitimize_address. Given X, return true if it ++ is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. ++ ++ This respectively represent canonical shift-add rtxs or scaled ++ memory addresses. */ ++static bool ++mem_shadd_or_shadd_rtx_p (rtx x) ++{ ++ return ((GET_CODE (x) == ASHIFT ++ || GET_CODE (x) == MULT) ++ && CONST_INT_P (XEXP (x, 1)) ++ && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) ++ || (GET_CODE (x) == MULT ++ && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3)))); ++} ++ + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can + be legitimized in a way that the generic machinery might not expect, + return a new address, otherwise return NULL. MODE is the mode of +@@ -3238,6 +3254,33 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + loongarch_split_plus (x, &base, &offset); + if (offset != 0) + { ++ /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ ++ if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) ++ && IMM12_OPERAND (offset)) ++ { ++ rtx index = XEXP (base, 0); ++ rtx fp = XEXP (base, 1); ++ ++ if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) ++ { ++ /* If we were given a MULT, we must fix the constant ++ as we're going to create the ASHIFT form. */ ++ int shift_val = INTVAL (XEXP (index, 1)); ++ if (GET_CODE (index) == MULT) ++ shift_val = exact_log2 (shift_val); ++ ++ rtx reg1 = gen_reg_rtx (Pmode); ++ rtx reg3 = gen_reg_rtx (Pmode); ++ loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); ++ loongarch_emit_binary (PLUS, reg3, ++ gen_rtx_ASHIFT (Pmode, XEXP (index, 0), ++ GEN_INT (shift_val)), ++ reg1); ++ ++ return reg3; ++ } ++ } ++ + if (!loongarch_valid_base_register_p (base, mode, false)) + base = copy_to_mode_reg (Pmode, base); + addr = loongarch_add_offset (NULL, base, offset); +-- +2.43.0 + diff --git a/0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch b/0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch new file mode 100644 index 0000000..51606bc --- /dev/null +++ b/0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch @@ -0,0 +1,214 @@ +From 3f45bbfe924ffe38832b2ad0050589b9f188422e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 25 Jan 2024 14:44:39 +0800 +Subject: [PATCH 124/188] LoongArch: Merge template got_load_tls_{ld/gd/le/ie}. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_load_tls): + Load all types of tls symbols through one function. + (loongarch_got_load_tls_gd): Delete. + (loongarch_got_load_tls_ld): Delete. + (loongarch_got_load_tls_ie): Delete. + (loongarch_got_load_tls_le): Delete. + (loongarch_call_tls_get_addr): Modify the called function name. + (loongarch_legitimize_tls_address): Likewise. + * config/loongarch/loongarch.md (@got_load_tls_gd): Delete. + (@load_tls): New template. + (@got_load_tls_ld): Delete. + (@got_load_tls_le): Delete. + (@got_load_tls_ie): Delete. +--- + gcc/config/loongarch/loongarch.cc | 47 +++++------------------- + gcc/config/loongarch/loongarch.md | 59 ++++++++++++------------------- + 2 files changed, 30 insertions(+), 76 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 612a9c138..ced7e58c2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2732,36 +2732,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + /* The __tls_get_attr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + +-/* Load an entry from the GOT for a TLS GD access. */ ++/* Load an entry for a TLS access. */ + + static rtx +-loongarch_got_load_tls_gd (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym) + { +- return gen_got_load_tls_gd (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS LD access. */ +- +-static rtx +-loongarch_got_load_tls_ld (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ld (Pmode, dest, sym); +-} +- +-/* Load an entry from the GOT for a TLS IE access. */ +- +-static rtx +-loongarch_got_load_tls_ie (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_ie (Pmode, dest, sym); +-} +- +-/* Add in the thread pointer for a TLS LE access. */ +- +-static rtx +-loongarch_got_load_tls_le (rtx dest, rtx sym) +-{ +- return gen_got_load_tls_le (Pmode, dest, sym); ++ return gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2805,14 +2781,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- { +- if (type == SYMBOL_TLSLDM) +- emit_insn (loongarch_got_load_tls_ld (a0, loc)); +- else if (type == SYMBOL_TLSGD) +- emit_insn (loongarch_got_load_tls_gd (a0, loc)); +- else +- gcc_unreachable (); +- } ++ emit_insn (loongarch_load_tls (a0, loc)); + + if (flag_plt) + { +@@ -2949,10 +2918,10 @@ loongarch_legitimize_tls_address (rtx loc) + /* la.tls.ie; tp-relative add. */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -2975,7 +2944,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_got_load_tls_ie (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3007,11 +2976,11 @@ loongarch_legitimize_tls_address (rtx loc) + + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp1 = gen_reg_rtx (Pmode); ++ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); + high = loongarch_force_temporary (tmp3, high); +@@ -3039,7 +3008,7 @@ loongarch_legitimize_tls_address (rtx loc) + } + } + else +- emit_insn (loongarch_got_load_tls_le (tmp1, loc)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 23d8dc126..4f9a92334 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -51,10 +51,7 @@ + UNSPEC_BITREV_8B + + ;; TLS +- UNSPEC_TLS_GD +- UNSPEC_TLS_LD +- UNSPEC_TLS_LE +- UNSPEC_TLS_IE ++ UNSPEC_TLS + + ;; Stack tie + UNSPEC_TIE +@@ -2701,45 +2698,33 @@ + + ;; Thread-Local Storage + +-(define_insn "@got_load_tls_gd" ++(define_insn "@load_tls" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P + [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_GD))] ++ UNSPEC_TLS))] + "" +- "la.tls.gd\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) +- +-(define_insn "@got_load_tls_ld" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_LD))] +- "" +- "la.tls.ld\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) ++{ ++ enum loongarch_symbol_type symbol_type; ++ gcc_assert (loongarch_symbolic_constant_p (operands[1], &symbol_type)); + +-(define_insn "@got_load_tls_le" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_LE))] +- "" +- "la.tls.le\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) ++ switch (symbol_type) ++ { ++ case SYMBOL_TLS_LE: ++ return "la.tls.le\t%0,%1"; ++ case SYMBOL_TLS_IE: ++ return "la.tls.ie\t%0,%1"; ++ case SYMBOL_TLSLDM: ++ return "la.tls.ld\t%0,%1"; ++ case SYMBOL_TLSGD: ++ return "la.tls.gd\t%0,%1"; + +-(define_insn "@got_load_tls_ie" +- [(set (match_operand:P 0 "register_operand" "=r") +- (unspec:P +- [(match_operand:P 1 "symbolic_operand" "")] +- UNSPEC_TLS_IE))] +- "" +- "la.tls.ie\t%0,%1" +- [(set_attr "got" "load") +- (set_attr "mode" "")]) ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "mode" "") ++ (set_attr "insn_count" "2")]) + + ;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the + ;; value in the low word. +-- +2.43.0 + diff --git a/0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch b/0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch new file mode 100644 index 0000000..c14ee12 --- /dev/null +++ b/0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch @@ -0,0 +1,453 @@ +From cd177538c2a0f5248e9e7af6247b4d1ba6fe55db Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 25 Jan 2024 19:10:46 +0800 +Subject: [PATCH 125/188] LoongArch: Add the macro implementation of + mcmodel=extreme. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p): + Add function declaration. + * config/loongarch/loongarch.cc (loongarch_symbolic_constant_p): + For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend" + is not allowed + (loongarch_load_tls): Added macro support in extreme mode. + (loongarch_call_tls_get_addr): Likewise. + (loongarch_legitimize_tls_address): Likewise. + (loongarch_force_address): Likewise. + (loongarch_legitimize_move): Likewise. + (loongarch_output_mi_thunk): Likewise. + (loongarch_option_override_internal): Remove the code that detects + explicit relocs status. + (loongarch_handle_model_attribute): Likewise. + * config/loongarch/loongarch.md (movdi_symbolic_off64): New template. + * config/loongarch/predicates.md (symbolic_off64_operand): New predicate. + (symbolic_off64_or_reg_operand): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/attr-model-5.c: New test. + * gcc.target/loongarch/func-call-extreme-5.c: New test. + * gcc.target/loongarch/func-call-extreme-6.c: New test. + * gcc.target/loongarch/tls-extreme-macro.c: New test. +--- + gcc/config/loongarch/loongarch-protos.h | 1 + + gcc/config/loongarch/loongarch.cc | 110 +++++++++++------- + gcc/config/loongarch/loongarch.md | 48 +++++++- + gcc/config/loongarch/predicates.md | 12 ++ + .../gcc.target/loongarch/attr-model-5.c | 8 ++ + .../loongarch/func-call-extreme-5.c | 7 ++ + .../loongarch/func-call-extreme-6.c | 7 ++ + .../gcc.target/loongarch/tls-extreme-macro.c | 35 ++++++ + 8 files changed, 184 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 5060efbb6..87b94e8b0 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); + extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); + extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); + extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); ++extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); + #endif /* ! GCC_LOONGARCH_PROTOS_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index ced7e58c2..9cfe5bfb2 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1932,8 +1932,13 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) + relocations. */ + switch (*symbol_type) + { +- case SYMBOL_PCREL: + case SYMBOL_PCREL64: ++ /* When the code model is extreme, the non-zero offset situation ++ has not been handled well, so it is disabled here now. */ ++ if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64)) ++ return false; ++ /* fall through */ ++ case SYMBOL_PCREL: + /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ + return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); + +@@ -2735,9 +2740,15 @@ static GTY (()) rtx loongarch_tls_symbol; + /* Load an entry for a TLS access. */ + + static rtx +-loongarch_load_tls (rtx dest, rtx sym) ++loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type) + { +- return gen_load_tls (Pmode, dest, sym); ++ /* TLS LE gets a 32 or 64 bit offset here, so one register can do it. */ ++ if (type == SYMBOL_TLS_LE) ++ return gen_load_tls (Pmode, dest, sym); ++ ++ return loongarch_symbol_extreme_p (type) ++ ? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode)) ++ : gen_load_tls (Pmode, dest, sym); + } + + /* Return an instruction sequence that calls __tls_get_addr. SYM is +@@ -2769,8 +2780,6 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp1 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +@@ -2781,7 +2790,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + emit_insn (gen_tls_low (Pmode, a0, high, loc)); + } + else +- emit_insn (loongarch_load_tls (a0, loc)); ++ emit_insn (loongarch_load_tls (a0, loc, type)); + + if (flag_plt) + { +@@ -2848,22 +2857,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_EXTREME: + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); +- +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), +- loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) ++ { ++ rtx tmp1 = gen_reg_rtx (Pmode); ++ rtx high = gen_reg_rtx (Pmode); ++ ++ loongarch_emit_move (high, ++ gen_rtx_HIGH (Pmode, ++ loongarch_tls_symbol)); ++ loongarch_emit_move (tmp1, ++ gen_rtx_LO_SUM (Pmode, ++ gen_rtx_REG (Pmode, 0), ++ loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); ++ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); ++ loongarch_emit_move (dest, ++ gen_rtx_MEM (Pmode, ++ gen_rtx_PLUS (Pmode, ++ high, tmp1))); ++ } ++ else ++ emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, ++ gen_reg_rtx (DImode))); + } + break; + +@@ -2928,8 +2943,6 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + rtx tmp3 = gen_reg_rtx (Pmode); + emit_insn (gen_tls_low (Pmode, tmp3, + gen_rtx_REG (Pmode, 0), tmp2)); +@@ -2944,7 +2957,7 @@ loongarch_legitimize_tls_address (rtx loc) + emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3001,14 +3014,12 @@ loongarch_legitimize_tls_address (rtx loc) + + if (TARGET_CMODEL_EXTREME) + { +- gcc_assert (TARGET_EXPLICIT_RELOCS); +- + emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2)); + emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2)); + } + } + else +- emit_insn (loongarch_load_tls (tmp1, tmp2)); ++ emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_LE)); + emit_insn (gen_add3_insn (dest, tmp1, tp)); + } + break; +@@ -3081,7 +3092,7 @@ loongarch_force_address (rtx x, machine_mode mode) + return x; + } + +-static bool ++bool + loongarch_symbol_extreme_p (enum loongarch_symbol_type type) + { + switch (type) +@@ -3402,6 +3413,21 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) + return true; + } + ++ /* Obtain the address of the symbol through the macro instruction ++ of two registers. */ ++ enum loongarch_symbol_type symbol_type; ++ if (TARGET_64BIT && register_operand (dest, mode) ++ && loongarch_symbolic_constant_p (src, &symbol_type) ++ && loongarch_symbol_extreme_p (symbol_type)) ++ { ++ gcc_assert (can_create_pseudo_p ()); ++ rtx tmp_reg = gen_reg_rtx (DImode); ++ emit_insn (gen_movdi_symbolic_off64 (dest, src, tmp_reg)); ++ set_unique_reg_note (get_last_insn (), REG_UNUSED, tmp_reg); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, src); ++ return true; ++ } ++ + return false; + } + +@@ -7458,12 +7484,22 @@ loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + allowed, otherwise load the address into a register first. */ + if (use_sibcall_p) + { +- insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ if (TARGET_CMODEL_EXTREME) ++ { ++ emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); ++ insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx)); ++ } ++ else ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + } + else + { +- loongarch_emit_move (temp1, fnaddr); ++ if (TARGET_CMODEL_EXTREME) ++ emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); ++ else ++ loongarch_emit_move (temp1, fnaddr); ++ + emit_jump_insn (gen_indirect_jump (temp1)); + } + +@@ -7568,10 +7604,6 @@ loongarch_option_override_internal (struct gcc_options *opts, + switch (la_target.cmodel) + { + case CMODEL_EXTREME: +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) +- error ("code model %qs is not compatible with %s", +- "extreme", "-mexplicit-relocs=none"); +- + if (opts->x_flag_plt) + { + if (global_options_set.x_flag_plt) +@@ -7989,14 +8021,6 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, + *no_add_attrs = true; + return NULL_TREE; + } +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) +- { +- error_at (DECL_SOURCE_LOCATION (decl), +- "%qE attribute is not compatible with %s", name, +- "-mexplicit-relocs=none"); +- *no_add_attrs = true; +- return NULL_TREE; +- } + + arg = TREE_VALUE (arg); + if (TREE_CODE (arg) != STRING_CST) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 4f9a92334..add55e0af 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -82,6 +82,8 @@ + + UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1 + UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1 ++ ++ UNSPEC_LOAD_SYMBOL_OFFSET64 + ]) + + (define_c_enum "unspecv" [ +@@ -2182,6 +2184,46 @@ + [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "DI")]) + ++;; Use two registers to get the global symbol address from the got table. ++;; la.global rd, rt, sym ++ ++(define_insn_and_split "movdi_symbolic_off64" ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (match_operand:DI 1 "symbolic_off64_or_reg_operand" "Yd,r")) ++ (unspec:DI [(const_int 0)] ++ UNSPEC_LOAD_SYMBOL_OFFSET64) ++ (clobber (match_operand:DI 2 "register_operand" "=&r,r"))] ++ "TARGET_64BIT && TARGET_CMODEL_EXTREME" ++{ ++ if (which_alternative == 1) ++ return "#"; ++ ++ enum loongarch_symbol_type symbol_type; ++ gcc_assert (loongarch_symbolic_constant_p (operands[1], &symbol_type)); ++ ++ switch (symbol_type) ++ { ++ case SYMBOL_PCREL64: ++ return "la.local\t%0,%2,%1"; ++ case SYMBOL_GOT_DISP: ++ return "la.global\t%0,%2,%1"; ++ case SYMBOL_TLS_IE: ++ return "la.tls.ie\t%0,%2,%1"; ++ case SYMBOL_TLSGD: ++ return "la.tls.gd\t%0,%2,%1"; ++ case SYMBOL_TLSLDM: ++ return "la.tls.ld\t%0,%2,%1"; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ "&& REG_P (operands[1]) && find_reg_note (insn, REG_UNUSED, operands[2]) != 0" ++ [(set (match_dup 0) (match_dup 1))] ++ "" ++ [(set_attr "mode" "DI") ++ (set_attr "insn_count" "5")]) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +@@ -2724,7 +2766,11 @@ + } + } + [(set_attr "mode" "") +- (set_attr "insn_count" "2")]) ++ (set (attr "insn_count") ++ (if_then_else ++ (match_test "TARGET_CMODEL_EXTREME") ++ (const_int 4) ++ (const_int 2)))]) + + ;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the + ;; value in the low word. +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 824a85b36..1d9a30695 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -576,6 +576,18 @@ + || symbolic_pcrel_offset_operand (op, Pmode)); + }) + ++(define_predicate "symbolic_off64_operand" ++ (match_code "const,symbol_ref,label_ref") ++{ ++ enum loongarch_symbol_type type; ++ return loongarch_symbolic_constant_p (op, &type) ++ && loongarch_symbol_extreme_p (type); ++}) ++ ++(define_predicate "symbolic_off64_or_reg_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "symbolic_off64_operand"))) ++ + (define_predicate "equality_operator" + (match_code "eq,ne")) + +diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-5.c b/gcc/testsuite/gcc.target/loongarch/attr-model-5.c +new file mode 100644 +index 000000000..5f2c3ec9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/attr-model-5.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mexplicit-relocs=none -mcmodel=extreme -O2 -fno-pic" } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,x" } } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,y" } } */ ++/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,counter" } } */ ++ ++#define ATTR_MODEL_TEST ++#include "attr-model-test.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c +new file mode 100644 +index 000000000..b1bd9d236 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=none -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,g" } } */ ++/* { dg-final { scan-assembler "test1:.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,f" } } */ ++/* { dg-final { scan-assembler "test2:.*la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,l" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c +new file mode 100644 +index 000000000..6e6ad5c9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=none -mcmodel=extreme" } */ ++/* { dg-final { scan-assembler "test:.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,g" } } */ ++/* { dg-final { scan-assembler "test1:.*la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,f" } } */ ++/* { dg-final { scan-assembler "test2:.*la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,l" } } */ ++ ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +new file mode 100644 +index 000000000..4341f8212 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none" } */ ++/* { dg-final { scan-assembler "test_le:.*la.tls.le\t\\\$r\[0-9\]+,\\\.L" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_ie:.*la.tls.ie\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\.L" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_ld:.*la.tls.ld\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\.L.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,__tls_get_addr" { target tls_native } } } */ ++/* { dg-final { scan-assembler "test_le:.*la.tls.gd\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\.L.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,__tls_get_addr" { target tls_native } } } */ ++ ++__thread int c __attribute__ ((tls_model ("local-exec"))); ++__thread int d __attribute__ ((tls_model ("initial-exec"))); ++__thread int e __attribute__ ((tls_model ("local-dynamic"))); ++__thread int f __attribute__ ((tls_model ("global-dynamic"))); ++ ++int ++test_le (void) ++{ ++ return c; ++} ++ ++int ++test_ie (void) ++{ ++ return d; ++} ++ ++int ++test_ld (void) ++{ ++ return e; ++} ++ ++int ++test_gd (void) ++{ ++ return f; ++} +-- +2.43.0 + diff --git a/0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch b/0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch new file mode 100644 index 0000000..ed9d4db --- /dev/null +++ b/0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch @@ -0,0 +1,126 @@ +From 1ccf16353b2be4308c79f3b011cb800bfa6f94f4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 26 Jan 2024 10:46:51 +0800 +Subject: [PATCH 126/188] LoongArch: Enable explicit reloc for extreme TLS + GD/LD with -mexplicit-relocs=auto. + +Binutils does not support relaxation using four instructions to obtain +symbol addresses + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + When the code model of the symbol is extreme and -mexplicit-relocs=auto, + the macro instruction loading symbol address is not applicable. + (loongarch_call_tls_get_addr): Adjust code. + (loongarch_legitimize_tls_address): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New test. + * gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 19 +++++++++---------- + .../explicit-relocs-extreme-auto-tls-ld-gd.c | 5 +++++ + .../explicit-relocs-medium-auto-tls-ld-gd.c | 5 +++++ + 3 files changed, 19 insertions(+), 10 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 9cfe5bfb2..84b949021 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -1968,6 +1968,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO) + return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS; + ++ /* The linker don't know how to relax accesses in extreme code model. */ ++ if (loongarch_symbol_extreme_p (type)) ++ return true; ++ + switch (type) + { + case SYMBOL_TLS_IE: +@@ -1979,11 +1983,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) + does not relax 64-bit pc-relative accesses as at now. */ + return true; + case SYMBOL_GOT_DISP: +- /* The linker don't know how to relax GOT accesses in extreme +- code model. */ +- if (TARGET_CMODEL_EXTREME) +- return true; +- + /* If we are performing LTO for a final link, and we have the + linker plugin so we know the resolution of the symbols, then + all GOT references are binding to external symbols or +@@ -2772,7 +2771,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + start_sequence (); + +- if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) ++ if (loongarch_explicit_relocs_p (type)) + { + /* Split tls symbol to high and low. */ + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +@@ -2805,7 +2804,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_MEDIUM: + { + rtx reg = gen_reg_rtx (Pmode); +- if (TARGET_EXPLICIT_RELOCS) ++ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { + emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); + rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +@@ -2841,7 +2840,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + case CMODEL_NORMAL: + case CMODEL_MEDIUM: + { +- if (TARGET_EXPLICIT_RELOCS) ++ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { + rtx high = gen_reg_rtx (Pmode); + loongarch_emit_move (high, +@@ -2935,7 +2934,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp1 = gen_reg_rtx (Pmode); + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); + dest = gen_reg_rtx (Pmode); +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +@@ -2992,7 +2991,7 @@ loongarch_legitimize_tls_address (rtx loc) + tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); + dest = gen_reg_rtx (Pmode); + +- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) ++ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE)) + { + tmp3 = gen_reg_rtx (Pmode); + rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..35bd4570a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } */ ++/* { dg-final { scan-assembler-not "la.tls.\[lg\]d" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..47bffae8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler-not "la.global" { target tls_native } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 + diff --git a/0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch b/0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch new file mode 100644 index 0000000..653a7ab --- /dev/null +++ b/0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch @@ -0,0 +1,72 @@ +From d802fd5eb24bba0c274edeea5aff33e794927aaa Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 26 Jan 2024 11:14:00 +0800 +Subject: [PATCH 127/188] LoongArch: Added support for loading __get_tls_addr + symbol address using call36. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr): + Add support for call36. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++----- + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 5 +++++ + 2 files changed, 21 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 84b949021..0050813df 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2803,17 +2803,27 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + case CMODEL_MEDIUM: + { +- rtx reg = gen_reg_rtx (Pmode); + if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE) + { +- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); +- rtx call = gen_call_value_internal_1 (Pmode, v0, reg, +- loongarch_tls_symbol, +- const0_rtx); +- insn = emit_call_insn (call); ++ rtx call; ++ ++ if (HAVE_AS_SUPPORT_CALL36) ++ call = gen_call_value_internal (v0, loongarch_tls_symbol, ++ const0_rtx); ++ else ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_insn (gen_pcalau12i (Pmode, reg, ++ loongarch_tls_symbol)); ++ call = gen_call_value_internal_1 (Pmode, v0, reg, ++ loongarch_tls_symbol, ++ const0_rtx); ++ } ++ insn = emit_call_insn (call); + } + else + { ++ rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, loongarch_tls_symbol); + insn = emit_call_insn (gen_call_value_internal (v0, + reg, +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +new file mode 100644 +index 000000000..d1a482083 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && loongarch_call36_support } } } } */ ++ ++#include "./explicit-relocs-auto-tls-ld-gd.c" +-- +2.43.0 + diff --git a/0128-LoongArch-Don-t-split-the-instructions-containing-re.patch b/0128-LoongArch-Don-t-split-the-instructions-containing-re.patch new file mode 100644 index 0000000..9c088e0 --- /dev/null +++ b/0128-LoongArch-Don-t-split-the-instructions-containing-re.patch @@ -0,0 +1,514 @@ +From 45aace43891ccaef756f2f1356edbb0da676629b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Mon, 29 Jan 2024 15:20:07 +0800 +Subject: [PATCH 128/188] LoongArch: Don't split the instructions containing + relocs for extreme code model. + +The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for +addressing a symbol to be adjacent. So model them as "one large +instruction", i.e. define_insn, with two output registers. The real +address is the sum of these two registers. + +The advantage of this approach is the RTL passes can still use ldx/stx +instructions to skip an addi.d instruction. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (unspec): Add + UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2. + (la_pcrel64_two_parts): New define_insn. + * config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a + typo in the comment. + (loongarch_call_tls_get_addr): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, use la_pcrel64_two_parts for + addressing the TLS symbol and __tls_get_addr. Emit an REG_EQUAL + note to allow CSE addressing __tls_get_addr. + (loongarch_legitimize_tls_address): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address TLS IE symbols with + la_pcrel64_two_parts. + (loongarch_split_symbol): If -mcmodel=extreme + -mexplicit-relocs={always,auto}, address symbols with + la_pcrel64_two_parts. + (loongarch_output_mi_thunk): Clean up unreachable code. If + -mcmodel=extreme -mexplicit-relocs={always,auto}, address the MI + thunks with la_pcrel64_two_parts. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/func-call-extreme-1.c (dg-options): + Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i + instruction sequences are not reordered by the compiler. + (NOIPA): Disallow interprocedural optimizations. + * gcc.target/loongarch/func-call-extreme-2.c: Remove the content + duplicated from func-call-extreme-1.c, include it instead. + (dg-options): Likewise. + * gcc.target/loongarch/func-call-extreme-3.c (dg-options): + Likewise. + * gcc.target/loongarch/func-call-extreme-4.c (dg-options): + Likewise. + * gcc.target/loongarch/cmodel-extreme-1.c: New test. + * gcc.target/loongarch/cmodel-extreme-2.c: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-1.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-2.C: New test. + * g++.target/loongarch/cmodel-extreme-mi-thunk-3.C: New test. +--- + gcc/config/loongarch/loongarch.cc | 131 ++++++++++-------- + gcc/config/loongarch/loongarch.md | 20 +++ + .../loongarch/cmodel-extreme-mi-thunk-1.C | 11 ++ + .../loongarch/cmodel-extreme-mi-thunk-2.C | 6 + + .../loongarch/cmodel-extreme-mi-thunk-3.C | 6 + + .../gcc.target/loongarch/cmodel-extreme-1.c | 18 +++ + .../gcc.target/loongarch/cmodel-extreme-2.c | 7 + + .../loongarch/func-call-extreme-1.c | 14 +- + .../loongarch/func-call-extreme-2.c | 29 +--- + .../loongarch/func-call-extreme-3.c | 2 +- + .../loongarch/func-call-extreme-4.c | 2 +- + 11 files changed, 154 insertions(+), 92 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C + create mode 100644 gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0050813df..b8f0291ab 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2733,7 +2733,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) + return plus_constant (Pmode, reg, offset); + } + +-/* The __tls_get_attr symbol. */ ++/* The __tls_get_addr symbol. */ + static GTY (()) rtx loongarch_tls_symbol; + + /* Load an entry for a TLS access. */ +@@ -2773,20 +2773,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + + if (loongarch_explicit_relocs_p (type)) + { +- /* Split tls symbol to high and low. */ +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); +- high = loongarch_force_temporary (tmp, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc)); +- emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1)); ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc)); ++ emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2)); + } + else +- emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ { ++ /* Split tls symbol to high and low. */ ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); ++ ++ high = loongarch_force_temporary (tmp, high); ++ emit_insn (gen_tls_low (Pmode, a0, high, loc)); ++ } + } + else + emit_insn (loongarch_load_tls (a0, loc, type)); +@@ -2868,22 +2870,28 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + { + if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP)) + { +- rtx tmp1 = gen_reg_rtx (Pmode); +- rtx high = gen_reg_rtx (Pmode); ++ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + +- loongarch_emit_move (high, +- gen_rtx_HIGH (Pmode, +- loongarch_tls_symbol)); +- loongarch_emit_move (tmp1, +- gen_rtx_LO_SUM (Pmode, +- gen_rtx_REG (Pmode, 0), ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, + loongarch_tls_symbol)); +- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); +- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); +- loongarch_emit_move (dest, +- gen_rtx_MEM (Pmode, +- gen_rtx_PLUS (Pmode, +- high, tmp1))); ++ loongarch_emit_move ( ++ dest, ++ gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, ++ part1, ++ part2))); ++ ++ /* Put an REG_EQUAL note here to allow CSE (storing ++ part1 + part2, i.e. the address of tls_get_addr into ++ a saved register and use it for multiple TLS ++ accesses). */ ++ rtx sum = gen_rtx_UNSPEC ( ++ Pmode, gen_rtvec (1, loongarch_tls_symbol), ++ UNSPEC_ADDRESS_FIRST ++ + loongarch_classify_symbol (loongarch_tls_symbol)); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, sum); + } + else + emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol, +@@ -2946,24 +2954,30 @@ loongarch_legitimize_tls_address (rtx loc) + dest = gen_reg_rtx (Pmode); + if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE)) + { +- tmp3 = gen_reg_rtx (Pmode); +- rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); +- high = loongarch_force_temporary (tmp3, high); +- + if (TARGET_CMODEL_EXTREME) + { +- rtx tmp3 = gen_reg_rtx (Pmode); +- emit_insn (gen_tls_low (Pmode, tmp3, +- gen_rtx_REG (Pmode, 0), tmp2)); +- emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2)); +- emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2)); ++ gcc_assert (la_opt_explicit_relocs ++ != EXPLICIT_RELOCS_NONE); ++ ++ rtx part1 = gen_reg_rtx (Pmode); ++ rtx part2 = gen_reg_rtx (Pmode); ++ ++ emit_insn (gen_la_pcrel64_two_parts (part1, part2, ++ tmp2)); + emit_move_insn (tmp1, + gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, +- high, tmp3))); ++ part1, ++ part2))); + } + else +- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); ++ { ++ tmp3 = gen_reg_rtx (Pmode); ++ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); ++ ++ high = loongarch_force_temporary (tmp3, high); ++ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); ++ } + } + else + emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE)); +@@ -3142,24 +3156,23 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + || !loongarch_split_symbol_type (symbol_type)) + return false; + +- rtx high, temp1 = NULL; ++ rtx high; + + if (temp == NULL) + temp = gen_reg_rtx (Pmode); + +- /* Get the 12-31 bits of the address. */ +- high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); +- high = loongarch_force_temporary (temp, high); +- + if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) + { + gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); + +- temp1 = gen_reg_rtx (Pmode); +- emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), +- addr)); +- emit_insn (gen_lui_h_lo20 (temp1, temp1, addr)); +- emit_insn (gen_lui_h_hi12 (temp1, temp1, addr)); ++ high = gen_reg_rtx (Pmode); ++ emit_insn (gen_la_pcrel64_two_parts (high, temp, addr)); ++ } ++ else ++ { ++ /* Get the 12-31 bits of the address. */ ++ high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); ++ high = loongarch_force_temporary (temp, high); + } + + if (low_out) +@@ -3168,7 +3181,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + case SYMBOL_PCREL64: + if (can_create_pseudo_p ()) + { +- *low_out = gen_rtx_PLUS (Pmode, high, temp1); ++ *low_out = gen_rtx_PLUS (Pmode, high, temp); + break; + } + /* fall through */ +@@ -3180,7 +3193,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) + /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */ + { + if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ()) +- *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1)); ++ *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, ++ temp)); + else + { + rtx low = gen_rtx_LO_SUM (Pmode, high, addr); +@@ -7493,21 +7507,24 @@ loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + allowed, otherwise load the address into a register first. */ + if (use_sibcall_p) + { +- if (TARGET_CMODEL_EXTREME) +- { +- emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); +- insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx)); +- } +- else +- insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); ++ /* If TARGET_CMODEL_EXTREME, we cannot do a direct jump at all ++ and const_call_insn_operand should have returned false. */ ++ gcc_assert (!TARGET_CMODEL_EXTREME); ++ ++ insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + } + else + { +- if (TARGET_CMODEL_EXTREME) ++ if (!TARGET_CMODEL_EXTREME) ++ loongarch_emit_move (temp1, fnaddr); ++ else if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2)); + else +- loongarch_emit_move (temp1, fnaddr); ++ { ++ emit_insn (gen_la_pcrel64_two_parts (temp1, temp2, fnaddr)); ++ emit_move_insn (temp1, gen_rtx_PLUS (Pmode, temp1, temp2)); ++ } + + emit_jump_insn (gen_indirect_jump (temp1)); + } +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index add55e0af..9356194fe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -84,6 +84,8 @@ + UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1 + + UNSPEC_LOAD_SYMBOL_OFFSET64 ++ UNSPEC_LA_PCREL_64_PART1 ++ UNSPEC_LA_PCREL_64_PART2 + ]) + + (define_c_enum "unspecv" [ +@@ -2224,6 +2226,24 @@ + [(set_attr "mode" "DI") + (set_attr "insn_count" "5")]) + ++;; The 64-bit PC-relative part of address loading. ++;; Note that the psABI does not allow splitting it. ++(define_insn "la_pcrel64_two_parts" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(match_operand:DI 2 "") (pc)] UNSPEC_LA_PCREL_64_PART1)) ++ (set (match_operand:DI 1 "register_operand" "=r") ++ (unspec:DI [(match_dup 2) (pc)] UNSPEC_LA_PCREL_64_PART2))] ++ "TARGET_ABI_LP64 && la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE" ++ { ++ return "pcalau12i\t%0,%r2\n\t" ++ "addi.d\t%1,$r0,%L2\n\t" ++ "lu32i.d\t%1,%R2\n\t" ++ "lu52i.d\t%1,%1,%H2"; ++ } ++ [(set_attr "move_type" "move") ++ (set_attr "mode" "DI") ++ (set_attr "length" "16")]) ++ + ;; 32-bit Integer moves + + (define_expand "movsi" +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C +new file mode 100644 +index 000000000..ff1f7c165 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -mdirect-extern-access" } */ ++ ++struct A { ++ virtual ~A(); ++}; ++ ++struct B : virtual A {}; ++void var() { B(); } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\[^\n\]*%pc_hi20\\(\\.LTHUNK0\\)\n\taddi\\.d\t\[^\n\]*%pc_lo12\\(\\\.LTHUNK0\\)\n\tlu32i\\.d\t\[^\n\]*%pc64_lo20\\(\\.LTHUNK0\\)\n\tlu52i\\.d\t\[^\n\]*%pc64_hi12\\(\\.LTHUNK0\\)" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C +new file mode 100644 +index 000000000..c9aa16b41 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -mdirect-extern-access" } */ ++ ++#include "cmodel-extreme-mi-thunk-1.C" ++ ++/* { dg-final { scan-assembler "pcalau12i\t\[^\n\]*%pc_hi20\\(\\.LTHUNK0\\)\n\taddi\\.d\t\[^\n\]*%pc_lo12\\(\\\.LTHUNK0\\)\n\tlu32i\\.d\t\[^\n\]*%pc64_lo20\\(\\.LTHUNK0\\)\n\tlu52i\\.d\t\[^\n\]*%pc64_hi12\\(\\.LTHUNK0\\)" } } */ +diff --git a/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C +new file mode 100644 +index 000000000..afb86c8bd +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-inline -march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none -mdirect-extern-access" } */ ++ ++#include "cmodel-extreme-mi-thunk-1.C" ++ ++/* { dg-final { scan-assembler "la.local\t\[^\n\]*\\.LTHUNK0" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +new file mode 100644 +index 000000000..564ee4017 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ ++ ++int a; ++extern int b; ++__thread int c __attribute__ ((tls_model ("local-exec"))); ++__thread int d __attribute__ ((tls_model ("initial-exec"))); ++__thread int e __attribute__ ((tls_model ("local-dynamic"))); ++__thread int f __attribute__ ((tls_model ("global-dynamic"))); ++ ++void ++test (void) ++{ ++ a = b + c + d + e + f; ++} ++ ++/* a, b, d, e, f, and __tls_get_addr. */ ++/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +new file mode 100644 +index 000000000..ce834805f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ ++ ++#include "cmodel-extreme-1.c" ++ ++/* a, b, d, e, f, and __tls_get_addr. */ ++/* { dg-final { scan-rtl-dump-times "la_pcrel64_two_parts" 6 "final" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c +index db1e0f853..fdb4cf1ff 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c +@@ -1,31 +1,33 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + ++#define NOIPA __attribute__ ((noipa)) ++ + extern void g (void); +-void ++NOIPA void + f (void) + {} + +-static void ++NOIPA static void + l (void) + {} + +-void ++NOIPA void + test (void) + { + g (); + } + +-void ++NOIPA void + test1 (void) + { + f (); + } + +-void ++NOIPA void + test2 (void) + { + l (); +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c +index 21bf81ae8..dfba3882b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c +@@ -1,32 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + +-extern void g (void); +-void +-f (void) +-{} +- +-static void +-l (void) +-{} +- +-void +-test (void) +-{ +- g (); +-} +- +-void +-test1 (void) +-{ +- f (); +-} +- +-void +-test2 (void) +-{ +- l (); +-} ++#include "func-call-extreme-1.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +index a4da44b4a..1f5234f83 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +index 16b00f4c5..c42285006 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ ++/* { dg-options "-mabi=lp64d -O2 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ + /* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ + /* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +-- +2.43.0 + diff --git a/0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch b/0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch new file mode 100644 index 0000000..11c3c46 --- /dev/null +++ b/0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch @@ -0,0 +1,173 @@ +From 825847768a29ec9d50e01015167002998150cb27 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Fri, 26 Jan 2024 16:41:11 +0800 +Subject: [PATCH 129/188] LoongArch: Adjust cost of vector_stmt that match + multiply-add pattern. + +We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r +failed to vectorize effectively. For this reason, we adjust the cost of +128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit +vectorization. +The experimental results show that after the modification, 549.fotonik3d_r +performance can be improved by 9.77% under the 128-bit vectorization option. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_multiply_add_p): New. + (loongarch_vector_costs::add_stmt_cost): Adjust. + +gcc/testsuite/ChangeLog: + + * gfortran.dg/vect/vect-10.f90: New test. +--- + gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++ + gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++ + 2 files changed, 119 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90 + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b8f0291ab..526ea0bcb 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4153,6 +4153,37 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi + return 1 << ceil_log2 (uf); + } + ++/* Check if assign stmt rhs op comes from a multiply-add operation. */ ++static bool ++loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) ++{ ++ gassign *assign = dyn_cast (stmt_info->stmt); ++ if (!assign) ++ return false; ++ tree_code code = gimple_assign_rhs_code (assign); ++ if (code != PLUS_EXPR && code != MINUS_EXPR) ++ return false; ++ ++ auto is_mul_result = [&](int i) ++ { ++ tree rhs = gimple_op (assign, i); ++ if (TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); ++ if (!def_stmt_info ++ || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def) ++ return false; ++ gassign *rhs_assign = dyn_cast (def_stmt_info->stmt); ++ if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) ++ return false; ++ ++ return true; ++ }; ++ ++ return is_mul_result (1) || is_mul_result (2); ++} ++ + unsigned + loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + stmt_vec_info stmt_info, slp_tree, +@@ -4165,6 +4196,23 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, + { + int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, + misalign); ++ if (vectype && stmt_info) ++ { ++ gassign *assign = dyn_cast (STMT_VINFO_STMT (stmt_info)); ++ machine_mode mode = TYPE_MODE (vectype); ++ ++ /* We found through testing that this strategy (the stmt that ++ matches the multiply-add pattern) has positive returns only ++ when applied to the 128-bit vector stmt, so this restriction ++ is currently made. */ ++ if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign) ++ { ++ if (!vect_is_reduction (stmt_info) ++ && loongarch_multiply_add_p (m_vinfo, stmt_info)) ++ stmt_cost = 0; ++ } ++ } ++ + retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); + m_costs[where] += retval; + +diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +new file mode 100644 +index 000000000..b85bc2702 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 +@@ -0,0 +1,71 @@ ++! { dg-do compile } ++! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } } ++ ++MODULE material_mod ++ ++IMPLICIT NONE ++ ++integer, parameter :: dfp = selected_real_kind (13, 99) ++integer, parameter :: rfp = dfp ++ ++PUBLIC Mat_updateE, iepx, iepy, iepz ++ ++PRIVATE ++ ++integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz ++real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz ++integer :: imin, jmin, kmin ++integer, dimension (6) :: Exsize ++integer, dimension (6) :: Eysize ++integer, dimension (6) :: Ezsize ++integer, dimension (6) :: Hxsize ++integer, dimension (6) :: Hysize ++integer, dimension (6) :: Hzsize ++ ++CONTAINS ++ ++SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez) ++ ++integer, intent (in) :: nx, ny, nz ++ ++real (kind = rfp), intent (inout), & ++ dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex ++real (kind = rfp), intent (inout), & ++ dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey ++real (kind = rfp), intent (inout), & ++ dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez ++real (kind = rfp), intent (in), & ++ dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx ++real (kind = rfp), intent (in), & ++ dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy ++real (kind = rfp), intent (in), & ++ dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz ++ ++integer :: i, j, k, mp ++ ++do k = kmin, nz ++ do j = jmin, ny ++ do i = imin, nx ++ mp = iepx (i, j, k) ++ Ex (i, j, k) = Ex (i, j, k) + & ++ Dbdy (mp) * (Hz (i, j, k ) - Hz (i, j-1, k)) + & ++ Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j , k)) ++ ++ mp = iepy (i, j, k) ++ Ey (i, j, k) = Ey (i, j, k) + & ++ Dbdz (mp) * (Hx (i , j, k) - Hx (i, j, k-1)) + & ++ Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k )) ++ ++ mp = iepz (i, j, k) ++ Ez (i, j, k) = Ez (i, j, k) + & ++ Dbdx (mp) * (Hy (i, j , k) - Hy (i-1, j, k)) + & ++ Dbdy (mp) * (Hx (i, j-1, k) - Hx (i , j, k)) ++ end do ++ end do ++end do ++ ++END SUBROUTINE mat_updateE ++ ++END MODULE material_mod ++ ++! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } } +-- +2.43.0 + diff --git a/0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch b/0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch new file mode 100644 index 0000000..8777e83 --- /dev/null +++ b/0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch @@ -0,0 +1,113 @@ +From 99a48268961f05e87f4f9d6f3f22903869f50af7 Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 24 Jan 2024 17:19:32 +0800 +Subject: [PATCH 130/188] LoongArch: Fix incorrect return type for + frecipe/frsqrte intrinsic functions + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h + (__frecipe_s): Update function return type. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/larch-frecipe-intrinsic.c: New test. +--- + gcc/config/loongarch/larchintrin.h | 16 +++++----- + .../loongarch/larch-frecipe-intrinsic.c | 30 +++++++++++++++++++ + 2 files changed, 38 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 22035e767..6582dfe49 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -336,38 +336,38 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) + #ifdef __loongarch_frecipe + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s (float _1) + { +- __builtin_loongarch_frecipe_s ((float) _1); ++ return (float) __builtin_loongarch_frecipe_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d (double _1) + { +- __builtin_loongarch_frecipe_d ((double) _1); ++ return (double) __builtin_loongarch_frecipe_d ((double) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: SF, SF. */ +-extern __inline void ++extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s (float _1) + { +- __builtin_loongarch_frsqrte_s ((float) _1); ++ return (float) __builtin_loongarch_frsqrte_s ((float) _1); + } + + /* Assembly instruction format: fd, fj. */ + /* Data types in instruction templates: DF, DF. */ +-extern __inline void ++extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d (double _1) + { +- __builtin_loongarch_frsqrte_d ((double) _1); ++ return (double) __builtin_loongarch_frsqrte_d ((double) _1); + } + #endif + +diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +new file mode 100644 +index 000000000..6ce2bde0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-intrinsic.c +@@ -0,0 +1,30 @@ ++/* Test intrinsics for frecipe.{s/d} and frsqrte.{s/d} instructions */ ++/* { dg-do compile } */ ++/* { dg-options "-mfrecipe -O2" } */ ++/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ ++/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ ++ ++#include ++ ++float ++test_frecipe_s (float _1) ++{ ++ return __frecipe_s (_1); ++} ++double ++test_frecipe_d (double _1) ++{ ++ return __frecipe_d (_1); ++} ++float ++test_frsqrte_s (float _1) ++{ ++ return __frsqrte_s (_1); ++} ++double ++test_frsqrte_d (double _1) ++{ ++ return __frsqrte_d (_1); ++} +-- +2.43.0 + diff --git a/0131-LoongArch-Fix-an-ODR-violation.patch b/0131-LoongArch-Fix-an-ODR-violation.patch new file mode 100644 index 0000000..9961ab4 --- /dev/null +++ b/0131-LoongArch-Fix-an-ODR-violation.patch @@ -0,0 +1,60 @@ +From 89ebd7012ecf49c60bad8dd018e0aa573b58844b Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 2 Feb 2024 05:37:38 +0800 +Subject: [PATCH 131/188] LoongArch: Fix an ODR violation + +When bootstrapping GCC 14 with --with-build-config=bootstrap-lto, an ODR +violation is detected: + + ../../gcc/config/loongarch/loongarch-opts.cc:57: warning: + 'abi_minimal_isa' violates the C++ One Definition Rule [-Wodr] + 57 | abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES]; + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + 'abi_minimal_isa' was previously declared here + 186 | abi_minimal_isa = array, + ../../gcc/config/loongarch/loongarch-def.cc:186: note: + code may be misoptimized unless '-fno-strict-aliasing' is used + +Fix it by adding a proper declaration of abi_minimal_isa into +loongarch-def.h and remove the ODR-violating local declaration in +loongarch-opts.cc. + +gcc/ChangeLog: + + * config/loongarch/loongarch-def.h (abi_minimal_isa): Declare. + * config/loongarch/loongarch-opts.cc (abi_minimal_isa): Remove + the ODR-violating locale declaration. +--- + gcc/config/loongarch/loongarch-def.h | 3 +++ + gcc/config/loongarch/loongarch-opts.cc | 2 -- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 28da3ae5f..fdcf43fc7 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -203,5 +203,8 @@ extern loongarch_def_array + loongarch_cpu_align; + extern loongarch_def_array + loongarch_cpu_rtx_cost_data; ++extern loongarch_def_array< ++ loongarch_def_array, ++ N_ABI_BASE_TYPES> abi_minimal_isa; + + #endif /* LOONGARCH_DEF_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index a2b069d83..2ea3972d1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -53,8 +53,6 @@ static const int tm_multilib_list[] = { TM_MULTILIB_LIST }; + static int enabled_abi_types[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { 0 }; + + #define isa_required(ABI) (abi_minimal_isa[(ABI).base][(ABI).ext]) +-extern "C" const struct loongarch_isa +-abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES]; + + static inline int + is_multilib_enabled (struct loongarch_abi abi) +-- +2.43.0 + diff --git a/0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch b/0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch new file mode 100644 index 0000000..7a85dfc --- /dev/null +++ b/0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch @@ -0,0 +1,359 @@ +From f4a447bff86c7f5598a7461e353a3c6f4a101ed4 Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Fri, 2 Feb 2024 09:42:28 +0800 +Subject: [PATCH 132/188] LoongArch: testsuite: Fix + gcc.dg/vect/vect-reduc-mul_{1, 2}.c FAIL. + +This FAIL was introduced from r14-6908. The reason is that when merging +constant vector permutation implementations, the 128-bit matching situation +was not fully considered. In fact, the expansion of 128-bit vectors after +merging only supports value-based 4 elements set shuffle, so this time is a +complete implementation of the entire 128-bit vector constant permutation, +and some structural adjustments have also been made to the code. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_expand_vselect): Adjust. + (loongarch_expand_vselect_vconcat): Ditto. + (loongarch_try_expand_lsx_vshuf_const): New, use vshuf to implement + all 128-bit constant permutation situations. + (loongarch_expand_lsx_shuffle): Adjust and rename function name. + (loongarch_is_imm_set_shuffle): Renamed function name. + (loongarch_expand_vec_perm_even_odd): Function forward declaration. + (loongarch_expand_vec_perm_even_odd_1): Add implement for 128-bit + extract-even and extract-odd permutations. + (loongarch_is_odd_extraction): Delete. + (loongarch_is_even_extraction): Ditto. + (loongarch_expand_vec_perm_const): Adjust. +--- + gcc/config/loongarch/loongarch.cc | 218 ++++++++++++++++++++++-------- + 1 file changed, 163 insertions(+), 55 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 526ea0bcb..a0e0906af 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -8025,7 +8025,8 @@ struct expand_vec_perm_d + + static bool + loongarch_expand_vselect (rtx target, rtx op0, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + rtx rperm[MAX_VECT_LEN], x; + rtx_insn *insn; +@@ -8044,6 +8045,9 @@ loongarch_expand_vselect (rtx target, rtx op0, + remove_insn (insn); + return false; + } ++ ++ if (testing_p) ++ remove_insn (insn); + return true; + } + +@@ -8051,7 +8055,8 @@ loongarch_expand_vselect (rtx target, rtx op0, + + static bool + loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, +- const unsigned char *perm, unsigned nelt) ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) + { + machine_mode v2mode; + rtx x; +@@ -8059,7 +8064,7 @@ loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) + return false; + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); +- return loongarch_expand_vselect (target, x, perm, nelt); ++ return loongarch_expand_vselect (target, x, perm, nelt, testing_p); + } + + static tree +@@ -8315,11 +8320,87 @@ loongarch_set_handled_components (sbitmap components) + #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + #undef TARGET_ASM_ALIGNED_DI_OP + #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" ++ ++/* Use the vshuf instruction to implement all 128-bit constant vector ++ permuatation. */ ++ ++static bool ++loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) ++{ ++ int i; ++ rtx target, op0, op1, sel, tmp; ++ rtx rperm[MAX_VECT_LEN]; ++ ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->one_vector_p ? d->op0 : d->op1; ++ ++ if (GET_MODE (op0) != GET_MODE (op1) ++ || GET_MODE (op0) != GET_MODE (target)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ for (i = 0; i < d->nelt; i += 1) ++ rperm[i] = GEN_INT (d->perm[i]); ++ ++ if (d->vmode == E_V2DFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else if (d->vmode == E_V4SFmode) ++ { ++ sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); ++ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); ++ emit_move_insn (tmp, sel); ++ } ++ else ++ { ++ sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); ++ emit_move_insn (d->target, sel); ++ } ++ ++ switch (d->vmode) ++ { ++ case E_V2DFmode: ++ emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); ++ break; ++ case E_V2DImode: ++ emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); ++ break; ++ case E_V4SFmode: ++ emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); ++ break; ++ case E_V4SImode: ++ emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); ++ break; ++ case E_V8HImode: ++ emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); ++ break; ++ case E_V16QImode: ++ emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); ++ break; ++ default: ++ break; ++ } ++ ++ return true; ++ } ++ return false; ++} ++ + /* Construct (set target (vec_select op0 (parallel selector))) and +- return true if that's a valid instruction in the active ISA. */ ++ return true if that's a valid instruction in the active ISA. ++ In fact, it matches the special constant vector with repeated ++ 4-element sets. */ + + static bool +-loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d) + { + rtx x, elts[MAX_VECT_LEN]; + rtvec v; +@@ -8338,6 +8419,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + if (!loongarch_const_vector_shuffle_set_p (x, d->vmode)) + return false; + ++ if (d->testing_p) ++ return true; ++ + x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x); + x = gen_rtx_SET (d->target, x); + +@@ -8350,6 +8434,27 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) + return true; + } + ++static bool ++loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *); ++ ++/* Try to match and expand all kinds of 128-bit const vector permutation ++ cases. */ ++ ++static bool ++loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d) ++{ ++ if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16) ++ return false; ++ ++ if (loongarch_is_imm_set_shuffle (d)) ++ return true; ++ ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; ++ ++ return loongarch_try_expand_lsx_vshuf_const (d); ++} ++ + /* Try to simplify a two vector permutation using 2 intra-lane interleave + insns and cross-lane shuffle for 32-byte vectors. */ + +@@ -8442,7 +8547,7 @@ loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d) + return true; + } + +-/* Implement extract-even and extract-odd permutations. */ ++/* Implement 128-bit and 256-bit extract-even and extract-odd permutations. */ + + static bool + loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) +@@ -8457,6 +8562,50 @@ loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) + + switch (d->vmode) + { ++ /* 128 bit. */ ++ case E_V2DFmode: ++ if (odd) ++ emit_insn (gen_lsx_vilvh_d_f (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vilvl_d_f (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V2DImode: ++ if (odd) ++ emit_insn (gen_lsx_vilvh_d (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vilvl_d (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V4SFmode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_w_f (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_w_f (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V4SImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_w (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_w (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V8HImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_h (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_h (d->target, d->op0, d->op1)); ++ break; ++ ++ case E_V16QImode: ++ if (odd) ++ emit_insn (gen_lsx_vpickod_b (d->target, d->op0, d->op1)); ++ else ++ emit_insn (gen_lsx_vpickev_b (d->target, d->op0, d->op1)); ++ break; ++ ++ /* 256 bit. */ + case E_V4DFmode: + /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */ + if (odd) +@@ -8531,7 +8680,7 @@ static bool + loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d) + { + unsigned i, odd, nelt = d->nelt; +- if (!ISA_HAS_LASX) ++ if (!ISA_HAS_LASX && !ISA_HAS_LSX) + return false; + + odd = d->perm[0]; +@@ -8994,44 +9143,6 @@ loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) + return result; + } + +-static bool +-loongarch_is_odd_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 1; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 2; +- } +- +- return result; +-} +- +-static bool +-loongarch_is_even_extraction (struct expand_vec_perm_d *d) +-{ +- bool result = true; +- unsigned char buf = 0; +- +- for (int i = 0; i < d->nelt; i += 1) +- { +- if (buf != d->perm[i]) +- { +- result = false; +- break; +- } +- buf += 2; +- } +- +- return result; +-} +- + static bool + loongarch_is_extraction_permutation (struct expand_vec_perm_d *d) + { +@@ -9288,32 +9399,29 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) + for (i = 1; i < d->nelt; i += 2) + perm2[i] += d->nelt; + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- perm2, d->nelt)) ++ perm2, d->nelt, d->testing_p)) + return true; + } + else + { + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, +- d->perm, d->nelt)) ++ d->perm, d->nelt, ++ d->testing_p)) + return true; + + /* Try again with swapped operands. */ + for (i = 0; i < d->nelt; ++i) + perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1); + if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, +- perm2, d->nelt)) ++ perm2, d->nelt, d->testing_p)) + return true; + } + +- if (loongarch_expand_lsx_shuffle (d)) ++ if (loongarch_is_imm_set_shuffle (d)) + return true; + +- if (loongarch_is_odd_extraction (d) +- || loongarch_is_even_extraction (d)) +- { +- if (loongarch_expand_vec_perm_even_odd (d)) +- return true; +- } ++ if (loongarch_expand_vec_perm_even_odd (d)) ++ return true; + + if (loongarch_is_lasx_lowpart_interleave (d) + || loongarch_is_lasx_lowpart_interleave_2 (d) +-- +2.43.0 + diff --git a/0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch b/0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch new file mode 100644 index 0000000..b557043 --- /dev/null +++ b/0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch @@ -0,0 +1,72 @@ +From 6364467c68ac1ee2b54b866f462fb670a43029fa Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 2 Feb 2024 08:51:08 +0800 +Subject: [PATCH 133/188] LoongArch: Avoid out-of-bounds access in + loongarch_symbol_insns + +We call loongarch_symbol_insns with mode = MAX_MACHINE_MODE sometimes. +But in loongarch_symbol_insns: + + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + return 0; + +And LSX_SUPPORTED_MODE_P is defined as: + + #define LSX_SUPPORTED_MODE_P(MODE) \ + (ISA_HAS_LSX \ + && GET_MODE_SIZE (MODE) == UNITS_PER_LSX_REG ... ... + +GET_MODE_SIZE is expanded to a call to mode_to_bytes, which is defined: + + ALWAYS_INLINE poly_uint16 + mode_to_bytes (machine_mode mode) + { + #if GCC_VERSION >= 4001 + return (__builtin_constant_p (mode) + ? mode_size_inline (mode) : mode_size[mode]); + #else + return mode_size[mode]; + #endif + } + +There is an assertion in mode_size_inline: + + gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES); + +Note that NUM_MACHINE_MODES = MAX_MACHINE_MODE (emitted by genmodes.cc), +thus if __builtin_constant_p (mode) is evaluated true (it happens when +GCC is bootstrapped with LTO+PGO), the assertion will be triggered and +cause an ICE. OTOH if __builtin_constant_p (mode) is evaluated false, +mode_size[mode] is still an out-of-bound array access (the length or the +mode_size array is NUM_MACHINE_MODES). + +So we shouldn't call LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P with +MAX_MACHINE_MODE in loongarch_symbol_insns. This is very similar to a +MIPS bug PR98491 fixed by me about 3 years ago. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Do not + use LSX_SUPPORTED_MODE_P or LASX_SUPPORTED_MODE_P if mode is + MAX_MACHINE_MODE. +--- + gcc/config/loongarch/loongarch.cc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index a0e0906af..d23b09cc5 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2004,7 +2004,8 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + { + /* LSX LD.* and ST.* cannot support loading symbols via an immediate + operand. */ +- if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) ++ if (mode != MAX_MACHINE_MODE ++ && (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))) + return 0; + + switch (type) +-- +2.43.0 + diff --git a/0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch b/0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch new file mode 100644 index 0000000..c4edf98 --- /dev/null +++ b/0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch @@ -0,0 +1,122 @@ +From 659b51a6aed60f389009eff1e04645a47e55a45c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 3 Feb 2024 03:16:14 +0800 +Subject: [PATCH 134/188] LoongArch: Fix wrong LSX FP vector negation + +We expanded (neg x) to (minus const0 x) for LSX FP vectors, this is +wrong because -0.0 is not 0 - 0.0. This causes some Python tests to +fail when Python is built with LSX enabled. + +Use the vbitrevi.{d/w} instructions to simply reverse the sign bit +instead. We are already doing this for LASX and now we can unify them +into simd.md. + +gcc/ChangeLog: + + * config/loongarch/lsx.md (neg2): Remove the + incorrect expand. + * config/loongarch/simd.md (simdfmt_as_i): New define_mode_attr. + (elmsgnbit): Likewise. + (neg2): New define_insn. + * config/loongarch/lasx.md (negv4df2, negv8sf2): Remove as they + are now instantiated in simd.md. +--- + gcc/config/loongarch/lasx.md | 16 ---------------- + gcc/config/loongarch/lsx.md | 11 ----------- + gcc/config/loongarch/simd.md | 18 ++++++++++++++++++ + 3 files changed, 18 insertions(+), 27 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 946811e1a..38f35bad6 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -3028,22 +3028,6 @@ + [(set_attr "type" "simd_logic") + (set_attr "mode" "V8SF")]) + +-(define_insn "negv4df2" +- [(set (match_operand:V4DF 0 "register_operand" "=f") +- (neg:V4DF (match_operand:V4DF 1 "register_operand" "f")))] +- "ISA_HAS_LASX" +- "xvbitrevi.d\t%u0,%u1,63" +- [(set_attr "type" "simd_logic") +- (set_attr "mode" "V4DF")]) +- +-(define_insn "negv8sf2" +- [(set (match_operand:V8SF 0 "register_operand" "=f") +- (neg:V8SF (match_operand:V8SF 1 "register_operand" "f")))] +- "ISA_HAS_LASX" +- "xvbitrevi.w\t%u0,%u1,31" +- [(set_attr "type" "simd_logic") +- (set_attr "mode" "V8SF")]) +- + (define_insn "xvfmadd4" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (fma:FLASX (match_operand:FLASX 1 "register_operand" "f") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 612377436..d5aa3f46f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -728,17 +728,6 @@ + DONE; + }) + +-(define_expand "neg2" +- [(set (match_operand:FLSX 0 "register_operand") +- (neg:FLSX (match_operand:FLSX 1 "register_operand")))] +- "ISA_HAS_LSX" +-{ +- rtx reg = gen_reg_rtx (mode); +- emit_move_insn (reg, CONST0_RTX (mode)); +- emit_insn (gen_sub3 (operands[0], reg, operands[1])); +- DONE; +-}) +- + (define_expand "lsx_vrepli" + [(match_operand:ILSX 0 "register_operand") + (match_operand 1 "const_imm10_operand")] +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 8ac1d75a8..00d4c7831 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -85,12 +85,21 @@ + (define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l") + (V4SF "w") (V8SF "w")]) + ++;; Suffix for integer mode in LSX or LASX instructions to operating FP ++;; vectors using integer vector operations. ++(define_mode_attr simdfmt_as_i [(V2DF "d") (V4DF "d") ++ (V4SF "w") (V8SF "w")]) ++ + ;; Size of vector elements in bits. + (define_mode_attr elmbits [(V2DI "64") (V4DI "64") + (V4SI "32") (V8SI "32") + (V8HI "16") (V16HI "16") + (V16QI "8") (V32QI "8")]) + ++;; The index of sign bit in FP vector elements. ++(define_mode_attr elmsgnbit [(V2DF "63") (V4DF "63") ++ (V4SF "31") (V8SF "31")]) ++ + ;; This attribute is used to form an immediate operand constraint using + ;; "const__operand". + (define_mode_attr bitimm [(V16QI "uimm3") (V32QI "uimm3") +@@ -457,6 +466,15 @@ + DONE; + }) + ++;; FP negation. ++(define_insn "neg2" ++ [(set (match_operand:FVEC 0 "register_operand" "=f") ++ (neg:FVEC (match_operand:FVEC 1 "register_operand" "f")))] ++ "" ++ "vbitrevi.\t%0,%1," ++ [(set_attr "type" "simd_logic") ++ (set_attr "mode" "")]) ++ + ; The LoongArch SX Instructions. + (include "lsx.md") + +-- +2.43.0 + diff --git a/0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch b/0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch new file mode 100644 index 0000000..1055b9a --- /dev/null +++ b/0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch @@ -0,0 +1,30 @@ +From 539eb7639eeda8ea43149032f6aa724e5d46017c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Mon, 5 Feb 2024 16:23:20 +0800 +Subject: [PATCH 135/188] LoongArch: Fix wrong return value type of + __iocsrrd_h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__iocsrrd_h): Modify the + function return value type to unsigned short. +--- + gcc/config/loongarch/larchintrin.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 6582dfe49..046e042fd 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -268,7 +268,7 @@ __iocsrrd_b (unsigned int _1) + + /* Assembly instruction format: rd, rj. */ + /* Data types in instruction templates: UHI, USI. */ +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h (unsigned int _1) + { +-- +2.43.0 + diff --git a/0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch b/0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch new file mode 100644 index 0000000..5f9eb0b --- /dev/null +++ b/0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch @@ -0,0 +1,337 @@ +From 868f56db1101bf679f1b2510b9934a978f503a1e Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Mon, 5 Feb 2024 16:53:01 +0800 +Subject: [PATCH 136/188] LoongArch: Remove redundant symbol type conversions + in larchintrin.h. + +gcc/ChangeLog: + + * config/loongarch/larchintrin.h (__movgr2fcsr): Remove redundant + symbol type conversions. + (__cacop_d): Likewise. + (__cpucfg): Likewise. + (__asrtle_d): Likewise. + (__asrtgt_d): Likewise. + (__lddir_d): Likewise. + (__ldpte_d): Likewise. + (__crc_w_b_w): Likewise. + (__crc_w_h_w): Likewise. + (__crc_w_w_w): Likewise. + (__crc_w_d_w): Likewise. + (__crcc_w_b_w): Likewise. + (__crcc_w_h_w): Likewise. + (__crcc_w_w_w): Likewise. + (__crcc_w_d_w): Likewise. + (__csrrd_w): Likewise. + (__csrwr_w): Likewise. + (__csrxchg_w): Likewise. + (__csrrd_d): Likewise. + (__csrwr_d): Likewise. + (__csrxchg_d): Likewise. + (__iocsrrd_b): Likewise. + (__iocsrrd_h): Likewise. + (__iocsrrd_w): Likewise. + (__iocsrrd_d): Likewise. + (__iocsrwr_b): Likewise. + (__iocsrwr_h): Likewise. + (__iocsrwr_w): Likewise. + (__iocsrwr_d): Likewise. + (__frecipe_s): Likewise. + (__frecipe_d): Likewise. + (__frsqrte_s): Likewise. + (__frsqrte_d): Likewise. +--- + gcc/config/loongarch/larchintrin.h | 69 ++++++++++++++---------------- + 1 file changed, 33 insertions(+), 36 deletions(-) + +diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h +index 046e042fd..2e94e5612 100644 +--- a/gcc/config/loongarch/larchintrin.h ++++ b/gcc/config/loongarch/larchintrin.h +@@ -87,13 +87,13 @@ __rdtimel_w (void) + /* Assembly instruction format: fcsr, rj. */ + /* Data types in instruction templates: VOID, UQI, USI. */ + #define __movgr2fcsr(/*ui5*/ _1, _2) \ +- __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2); ++ __builtin_loongarch_movgr2fcsr ((_1), _2); + + #if defined __loongarch64 + /* Assembly instruction format: ui5, rj, si12. */ + /* Data types in instruction templates: VOID, USI, UDI, SI. */ + #define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \ +- ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3))) ++ __builtin_loongarch_cacop_d ((_1), (_2), (_3)) + #else + #error "Unsupported ABI." + #endif +@@ -104,7 +104,7 @@ extern __inline unsigned int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __cpucfg (unsigned int _1) + { +- return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1); ++ return __builtin_loongarch_cpucfg (_1); + } + + #ifdef __loongarch64 +@@ -114,7 +114,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtle_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtle_d (_1, _2); + } + + /* Assembly instruction format: rj, rk. */ +@@ -123,7 +123,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __asrtgt_d (long int _1, long int _2) + { +- __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2); ++ __builtin_loongarch_asrtgt_d (_1, _2); + } + #endif + +@@ -131,7 +131,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rd, rj, ui5. */ + /* Data types in instruction templates: DI, DI, UQI. */ + #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2))) ++ __builtin_loongarch_lddir_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -140,7 +140,7 @@ __asrtgt_d (long int _1, long int _2) + /* Assembly instruction format: rj, ui5. */ + /* Data types in instruction templates: VOID, DI, UQI. */ + #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \ +- ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2))) ++ __builtin_loongarch_ldpte_d ((_1), (_2)) + #else + #error "Unsupported ABI." + #endif +@@ -151,7 +151,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -160,7 +160,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -169,7 +169,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -179,7 +179,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crc_w_d_w (_1, _2); + } + #endif + +@@ -189,7 +189,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_b_w (char _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_b_w ((char) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_b_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -198,7 +198,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_h_w (short _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_h_w ((short) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_h_w (_1, _2); + } + + /* Assembly instruction format: rd, rj, rk. */ +@@ -207,7 +207,7 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_w_w (int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_w_w ((int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_w_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -217,44 +217,41 @@ extern __inline int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __crcc_w_d_w (long int _1, int _2) + { +- return (int) __builtin_loongarch_crcc_w_d_w ((long int) _1, (int) _2); ++ return __builtin_loongarch_crcc_w_d_w (_1, _2); + } + #endif + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI. */ + #define __csrrd_w(/*ui14*/ _1) \ +- ((unsigned int) __builtin_loongarch_csrrd_w ((_1))) ++ __builtin_loongarch_csrrd_w ((_1)) + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: USI, USI, USI. */ + #define __csrwr_w(/*unsigned int*/ _1, /*ui14*/ _2) \ +- ((unsigned int) __builtin_loongarch_csrwr_w ((unsigned int) (_1), (_2))) ++ __builtin_loongarch_csrwr_w ((_1), (_2)) + + /* Assembly instruction format: rd, rj, ui14. */ + /* Data types in instruction templates: USI, USI, USI, USI. */ + #define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) \ +- ((unsigned int) __builtin_loongarch_csrxchg_w ((unsigned int) (_1), \ +- (unsigned int) (_2), (_3))) ++ __builtin_loongarch_csrxchg_w ((_1), (_2), (_3)) + + #ifdef __loongarch64 + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: UDI, USI. */ + #define __csrrd_d(/*ui14*/ _1) \ +- ((unsigned long int) __builtin_loongarch_csrrd_d ((_1))) ++ __builtin_loongarch_csrrd_d ((_1)) + + /* Assembly instruction format: rd, ui14. */ + /* Data types in instruction templates: UDI, UDI, USI. */ + #define __csrwr_d(/*unsigned long int*/ _1, /*ui14*/ _2) \ +- ((unsigned long int) __builtin_loongarch_csrwr_d ((unsigned long int) (_1), \ +- (_2))) ++ __builtin_loongarch_csrwr_d ((_1), (_2)) + + /* Assembly instruction format: rd, rj, ui14. */ + /* Data types in instruction templates: UDI, UDI, UDI, USI. */ + #define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ + /*ui14*/ _3) \ +- ((unsigned long int) __builtin_loongarch_csrxchg_d ( \ +- (unsigned long int) (_1), (unsigned long int) (_2), (_3))) ++ __builtin_loongarch_csrxchg_d ((_1), (_2), (_3)) + #endif + + /* Assembly instruction format: rd, rj. */ +@@ -263,7 +260,7 @@ extern __inline unsigned char + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_b (unsigned int _1) + { +- return (unsigned char) __builtin_loongarch_iocsrrd_b ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_b (_1); + } + + /* Assembly instruction format: rd, rj. */ +@@ -272,7 +269,7 @@ extern __inline unsigned short + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h (unsigned int _1) + { +- return (unsigned short) __builtin_loongarch_iocsrrd_h ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_h (_1); + } + + /* Assembly instruction format: rd, rj. */ +@@ -281,7 +278,7 @@ extern __inline unsigned int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_w (unsigned int _1) + { +- return (unsigned int) __builtin_loongarch_iocsrrd_w ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_w (_1); + } + + #ifdef __loongarch64 +@@ -291,7 +288,7 @@ extern __inline unsigned long int + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_d (unsigned int _1) + { +- return (unsigned long int) __builtin_loongarch_iocsrrd_d ((unsigned int) _1); ++ return __builtin_loongarch_iocsrrd_d (_1); + } + #endif + +@@ -301,7 +298,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_b (unsigned char _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_b ((unsigned char) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_b (_1, _2); + } + + /* Assembly instruction format: rd, rj. */ +@@ -310,7 +307,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_h (unsigned short _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_h ((unsigned short) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_h (_1, _2); + } + + /* Assembly instruction format: rd, rj. */ +@@ -319,7 +316,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_w (unsigned int _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_w ((unsigned int) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_w (_1, _2); + } + + #ifdef __loongarch64 +@@ -329,7 +326,7 @@ extern __inline void + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrwr_d (unsigned long int _1, unsigned int _2) + { +- __builtin_loongarch_iocsrwr_d ((unsigned long int) _1, (unsigned int) _2); ++ __builtin_loongarch_iocsrwr_d (_1, _2); + } + #endif + +@@ -340,7 +337,7 @@ extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s (float _1) + { +- return (float) __builtin_loongarch_frecipe_s ((float) _1); ++ return __builtin_loongarch_frecipe_s (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -349,7 +346,7 @@ extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d (double _1) + { +- return (double) __builtin_loongarch_frecipe_d ((double) _1); ++ return __builtin_loongarch_frecipe_d (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -358,7 +355,7 @@ extern __inline float + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s (float _1) + { +- return (float) __builtin_loongarch_frsqrte_s ((float) _1); ++ return __builtin_loongarch_frsqrte_s (_1); + } + + /* Assembly instruction format: fd, fj. */ +@@ -367,7 +364,7 @@ extern __inline double + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d (double _1) + { +- return (double) __builtin_loongarch_frsqrte_d ((double) _1); ++ return __builtin_loongarch_frsqrte_d (_1); + } + #endif + +-- +2.43.0 + diff --git a/0137-LoongArch-When-checking-whether-the-assembler-suppor.patch b/0137-LoongArch-When-checking-whether-the-assembler-suppor.patch new file mode 100644 index 0000000..e1c70c1 --- /dev/null +++ b/0137-LoongArch-When-checking-whether-the-assembler-suppor.patch @@ -0,0 +1,54 @@ +From 3580ce2b8c57967117e55af48beba0aaa6257e8b Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Wed, 21 Feb 2024 11:17:14 +0800 +Subject: [PATCH 137/188] LoongArch: When checking whether the assembler + supports conditional branch relaxation, add compilation parameter + "--fatal-warnings" to the assembler. + +In binutils 2.40 and earlier versions, only a warning will be reported +when a relocation immediate value is out of bounds. As a result, +the value of the macro HAVE_AS_COND_BRANCH_RELAXATION will also be +defined as 1 when the assembler does not support conditional branch +relaxation. Therefore, add the compilation option "--fatal-warnings" +to avoid this problem. + +gcc/ChangeLog: + + * configure: Regenerate. + * configure.ac: Add parameter "--fatal-warnings" to assemble + when checking whether the assemble support conditional branch + relaxation. +--- + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/configure b/gcc/configure +index eecfe60d6..f31395017 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -28947,7 +28947,7 @@ else + nop + .endr + beq $a0,$a1,a' > conftest.s +- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/gcc/configure.ac b/gcc/configure.ac +index d1032440d..35f2c657f 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -5349,7 +5349,7 @@ x: + [Define if your assembler supports -mrelax option.])]) + gcc_GAS_CHECK_FEATURE([conditional branch relaxation support], + gcc_cv_as_loongarch_cond_branch_relax, +- [], ++ [--fatal-warnings], + [a: + .rept 32769 + nop +-- +2.43.0 + diff --git a/0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch b/0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch new file mode 100644 index 0000000..93fb0ad --- /dev/null +++ b/0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch @@ -0,0 +1,49 @@ +From e6968eb62b2a0adc7ef591594240582630adfc61 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 21 Feb 2024 23:54:53 +0800 +Subject: [PATCH 138/188] LoongArch: Don't falsely claim gold supported in + toplevel configure + +The gold linker has never been ported to LoongArch (and it seems +unlikely to be ported in the future as the new architectures are +focusing on lld and/or mold for fast linkers). + +ChangeLog: + + * configure.ac (ENABLE_GOLD): Remove loongarch*-*-* from target + list. + * configure: Regenerate. +--- + configure | 2 +- + configure.ac | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/configure b/configure +index 81b4a3cec..ebdca8c62 100755 +--- a/configure ++++ b/configure +@@ -3058,7 +3058,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +diff --git a/configure.ac b/configure.ac +index 9f8dbd319..4f45fd2ba 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -353,7 +353,7 @@ case "${ENABLE_GOLD}" in + # Check for target supported by gold. + case "${target}" in + i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ +- | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) ++ | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + configdirs="$configdirs gold" + if test x${ENABLE_GOLD} = xdefault; then + default_ld=gold +-- +2.43.0 + diff --git a/0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch b/0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch new file mode 100644 index 0000000..a4d705b --- /dev/null +++ b/0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch @@ -0,0 +1,56 @@ +From 7a4761a31454f999331e8aa5f831e26e249c4295 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 25 Feb 2024 20:40:41 +0800 +Subject: [PATCH 139/188] LoongArch: NFC: Deduplicate crc instruction defines + +Introduce an iterator for UNSPEC_CRC and UNSPEC_CRCC to make the next +change easier. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (CRC): New define_int_iterator. + (crc): New define_int_attr. + (loongarch_crc_w__w, loongarch_crcc_w__w): Unify + into ... + (loongarch__w__w): ... here. +--- + gcc/config/loongarch/loongarch.md | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 9356194fe..b5ad9eada 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4251,24 +4251,16 @@ + + + (define_mode_iterator QHSD [QI HI SI DI]) ++(define_int_iterator CRC [UNSPEC_CRC UNSPEC_CRCC]) ++(define_int_attr crc [(UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc")]) + +-(define_insn "loongarch_crc_w__w" ++(define_insn "loongarch__w__w" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] +- UNSPEC_CRC))] ++ CRC))] + "" +- "crc.w..w\t%0,%1,%2" +- [(set_attr "type" "unknown") +- (set_attr "mode" "")]) +- +-(define_insn "loongarch_crcc_w__w" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") +- (match_operand:SI 2 "register_operand" "r")] +- UNSPEC_CRCC))] +- "" +- "crcc.w..w\t%0,%1,%2" ++ ".w..w\t%0,%1,%2" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +-- +2.43.0 + diff --git a/0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch b/0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch new file mode 100644 index 0000000..404d5ea --- /dev/null +++ b/0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch @@ -0,0 +1,70 @@ +From 946f9153a5d813301b05fb56a75e2c7ce22a6c2a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 25 Feb 2024 20:44:34 +0800 +Subject: [PATCH 140/188] LoongArch: Remove unneeded sign extension after + crc/crcc instructions + +The specification of crc/crcc instructions is clear that the output is +sign-extended to GRLEN. Add a define_insn to tell the compiler this +fact and allow it to remove the unneeded sign extension on crc/crcc +output. As crc/crcc instructions are usually used in a tight loop, +this should produce a significant performance gain. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md + (loongarch__w__w_extended): New define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/crc-sext.c: New test; +--- + gcc/config/loongarch/loongarch.md | 11 +++++++++++ + gcc/testsuite/gcc.target/loongarch/crc-sext.c | 13 +++++++++++++ + 2 files changed, 24 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/crc-sext.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b5ad9eada..248ad12bb 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -4264,6 +4264,17 @@ + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + ++(define_insn "loongarch__w__w_extended" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (sign_extend:DI ++ (unspec:SI [(match_operand:QHSD 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] ++ CRC)))] ++ "TARGET_64BIT" ++ ".w..w\t%0,%1,%2" ++ [(set_attr "type" "unknown") ++ (set_attr "mode" "")]) ++ + ;; With normal or medium code models, if the only use of a pc-relative + ;; address is for loading or storing a value, then relying on linker + ;; relaxation is not better than emitting the machine instruction directly. +diff --git a/gcc/testsuite/gcc.target/loongarch/crc-sext.c b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +new file mode 100644 +index 000000000..9ade5a8e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/crc-sext.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**my_crc: ++** crc.w.d.w \$r4,\$r4,\$r5 ++** jr \$r1 ++*/ ++int my_crc(long long dword, int crc) ++{ ++ return __builtin_loongarch_crc_w_d_w(dword, crc); ++} +-- +2.43.0 + diff --git a/0141-LoongArch-Allow-s9-as-a-register-alias.patch b/0141-LoongArch-Allow-s9-as-a-register-alias.patch new file mode 100644 index 0000000..3573204 --- /dev/null +++ b/0141-LoongArch-Allow-s9-as-a-register-alias.patch @@ -0,0 +1,45 @@ +From a74a85ed5f5b00018553d614b4dc57eb1dd5f5ee Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 25 Jan 2024 23:49:13 +0800 +Subject: [PATCH 141/188] LoongArch: Allow s9 as a register alias + +The psABI allows using s9 as an alias of r22. + +gcc/ChangeLog: + + * config/loongarch/loongarch.h (ADDITIONAL_REGISTER_NAMES): Add + s9 as an alias of r22. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: New test. +--- + gcc/config/loongarch/loongarch.h | 1 + + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 3 +++ + 2 files changed, 4 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 15261fdc0..8bcdb8729 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -931,6 +931,7 @@ typedef struct { + { "t8", 20 + GP_REG_FIRST }, \ + { "x", 21 + GP_REG_FIRST }, \ + { "fp", 22 + GP_REG_FIRST }, \ ++ { "s9", 22 + GP_REG_FIRST }, \ + { "s0", 23 + GP_REG_FIRST }, \ + { "s1", 24 + GP_REG_FIRST }, \ + { "s2", 25 + GP_REG_FIRST }, \ +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +new file mode 100644 +index 000000000..d2e3b80f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -0,0 +1,3 @@ ++/* { dg-do compile } */ ++register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ ++register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 + diff --git a/0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch b/0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch new file mode 100644 index 0000000..9f620fb --- /dev/null +++ b/0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch @@ -0,0 +1,1117 @@ +From d568321f8894ed270bf0011892b86baa6d6b82bd Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 5 Mar 2024 20:46:57 +0800 +Subject: [PATCH 142/188] LoongArch: testsuite: Rewrite {x,}vfcmp-{d,f}.c to + avoid named registers + +Loops on named vector register are not vectorized (see comment 11 of +PR113622), so the these test cases have been failing for a while. +Rewrite them using check-function-bodies to remove hard coding register +names. A barrier is needed to always load the first operand before the +second operand. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vfcmp-f.c: Rewrite to avoid named + registers. + * gcc.target/loongarch/vfcmp-d.c: Likewise. + * gcc.target/loongarch/xvfcmp-f.c: Likewise. + * gcc.target/loongarch/xvfcmp-d.c: Likewise. +--- + gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 347 ++++++++++++++---- + gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 202 ++++++++-- + gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 204 ++++++++-- + 4 files changed, 816 insertions(+), 139 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +index 8b870ef38..87e4ed19e 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c +@@ -1,28 +1,188 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #define F double + #define I long long + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.ceq.d (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cune.d (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.slt.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sle.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.slt.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sle.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sule.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sult.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sule.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sult.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.clt.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cle.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.clt.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cle.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cule.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cult.d (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cule.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cult.d (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cun.d (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cor.d (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +index b9110b90c..8d2671998 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c ++++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c +@@ -2,7 +2,8 @@ + For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1. */ + + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #ifndef F + #define F float +@@ -19,160 +20,354 @@ + typedef F VF __attribute__ ((vector_size (VL))); + typedef I VI __attribute__ ((vector_size (VL))); + +-register VF a asm ("f0"); +-register VF b asm ("f1"); +-register VI c asm ("f2"); ++#define ARGS const VF *a, const VF *b, VI *c + + void +-compare_quiet_equal (void) ++compare_quiet_equal (ARGS) + { +- c = (a == b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a == *b); + } + + void +-compare_quiet_not_equal (void) ++compare_quiet_not_equal (ARGS) + { +- c = (a != b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a != *b); + } + + void +-compare_signaling_greater (void) ++compare_signaling_greater (ARGS) + { +- c = (a > b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a > *b); + } + + void +-compare_signaling_greater_equal (void) ++compare_signaling_greater_equal (ARGS) + { +- c = (a >= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a >= *b); + } + + void +-compare_signaling_less (void) ++compare_signaling_less (ARGS) + { +- c = (a < b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a < *b); + } + + void +-compare_signaling_less_equal (void) ++compare_signaling_less_equal (ARGS) + { +- c = (a <= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = (_a <= *b); + } + + void +-compare_signaling_not_greater (void) ++compare_signaling_not_greater (ARGS) + { +- c = ~(a > b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a > *b); + } + + void +-compare_signaling_less_unordered (void) ++compare_signaling_less_unordered (ARGS) + { +- c = ~(a >= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a >= *b); + } + + void +-compare_signaling_not_less (void) ++compare_signaling_not_less (ARGS) + { +- c = ~(a < b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a < *b); + } + + void +-compare_signaling_greater_unordered (void) ++compare_signaling_greater_unordered (ARGS) + { +- c = ~(a <= b); ++ VF _a = *a; ++ asm("" ::: "memory"); ++ *c = ~(_a <= *b); + } + + void +-compare_quiet_less (void) ++compare_quiet_less (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isless (a[i], b[i]) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isless (_a[i], (*b)[i]) ? -1 : 0; + } + + void +-compare_quiet_less_equal (void) ++compare_quiet_less_equal (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_islessequal (a[i], b[i]) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_islessequal (_a[i], (*b)[i]) ? -1 : 0; + } + + void +-compare_quiet_greater (void) ++compare_quiet_greater (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isgreater (a[i], b[i]) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isgreater (_a[i], (*b)[i]) ? -1 : 0; + } + + void +-compare_quiet_greater_equal (void) ++compare_quiet_greater_equal (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isgreaterequal (a[i], b[i]) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isgreaterequal (_a[i], (*b)[i]) ? -1 : 0; + } + + void +-compare_quiet_not_less (void) ++compare_quiet_not_less (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isless (a[i], b[i]) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isless (_a[i], (*b)[i]) ? 0 : -1; + } + + void +-compare_quiet_greater_unordered (void) ++compare_quiet_greater_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_islessequal (a[i], b[i]) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_islessequal (_a[i], (*b)[i]) ? 0 : -1; + } + + void +-compare_quiet_not_greater (void) ++compare_quiet_not_greater (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isgreater (a[i], b[i]) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isgreater (_a[i], (*b)[i]) ? 0 : -1; + } + + void +-compare_quiet_less_unordered (void) ++compare_quiet_less_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isgreaterequal (a[i], b[i]) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isgreaterequal (_a[i], (*b)[i]) ? 0 : -1; + } + + void +-compare_quiet_unordered (void) ++compare_quiet_unordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isunordered (a[i], b[i]) ? -1 : 0; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isunordered (_a[i], (*b)[i]) ? -1 : 0; + } + + void +-compare_quiet_ordered (void) ++compare_quiet_ordered (ARGS) + { +- for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++) +- c[i] = __builtin_isunordered (a[i], b[i]) ? 0 : -1; ++ VF _a = *a; ++ asm("" ::: "memory"); ++ for (int i = 0; i < sizeof (*c) / sizeof ((*c)[0]); i++) ++ (*c)[i] = __builtin_isunordered (_a[i], (*b)[i]) ? 0 : -1; + } + +-/* The "-" matches the .size directive after the function +- body, so we can ensure the instruction is in the correct function. */ ++/* ++** compare_quiet_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.ceq.s (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_not_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cune.s (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.slt.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sle.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.slt.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sle.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sule.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sult.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sule.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.sult.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.clt.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cle.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.clt.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cle.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cule.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cult.s (\$vr[0-9]+),\2,\1 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cule.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cult.s (\$vr[0-9]+),\1,\2 ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cun.s (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** vld (\$vr[0-9]+),\$r4,0 ++** vld (\$vr[0-9]+),\$r5,0 ++** vfcmp.cor.s (\$vr[0-9]+),(\1,\2|\2,\1) ++** vst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +index d8017caaa..b27efebad 100644 +--- a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c +@@ -1,5 +1,6 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + + #define F double + #define I long long +@@ -7,23 +8,182 @@ + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.ceq.d (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cune.d (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.slt.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sle.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.slt.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sle.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sule.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sult.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sule.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sult.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.clt.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cle.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.clt.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cle.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cule.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cult.d (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cule.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cult.d (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cun.d (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cor.d (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ +diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +index b54556475..1ca1e6c8b 100644 +--- a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c ++++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c +@@ -1,27 +1,189 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */ ++/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ + ++#define F float ++#define I int + #define VL 32 + + #include "vfcmp-f.c" + +-/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */ +-/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */ ++/* ++** compare_quiet_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.ceq.s (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cune.s (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.slt.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sle.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.slt.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sle.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sule.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_less_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sult.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_not_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sule.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_signaling_greater_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.sult.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.clt.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cle.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.clt.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_equal: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cle.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_less: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cule.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_greater_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cult.s (\$xr[0-9]+),\2,\1 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_not_greater: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cule.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_less_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cult.s (\$xr[0-9]+),\1,\2 ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_unordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cun.s (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ ++ ++/* ++** compare_quiet_ordered: ++** xvld (\$xr[0-9]+),\$r4,0 ++** xvld (\$xr[0-9]+),\$r5,0 ++** xvfcmp.cor.s (\$xr[0-9]+),(\1,\2|\2,\1) ++** xvst \3,\$r6,0 ++** jr \$r1 ++*/ +-- +2.43.0 + diff --git a/0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch b/0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch new file mode 100644 index 0000000..7b1e7f0 --- /dev/null +++ b/0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch @@ -0,0 +1,80 @@ +From 415d38d84b2e363a2d512b54baac5532553f1402 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Wed, 6 Mar 2024 09:19:59 +0800 +Subject: [PATCH 143/188] LoongArch: Use /lib instead of /lib64 as the library + search path for MUSL. + +gcc/ChangeLog: + + * config.gcc: Add a case for loongarch*-*-linux-musl*. + * config/loongarch/linux.h: Disable the multilib-compatible + treatment for *musl* targets. + * config/loongarch/musl.h: New file. +--- + gcc/config.gcc | 3 +++ + gcc/config/loongarch/linux.h | 4 +++- + gcc/config/loongarch/musl.h | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/loongarch/musl.h + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 039187fa2..499b36b45 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -2509,6 +2509,9 @@ riscv*-*-freebsd*) + + loongarch*-*-linux*) + tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}" ++ case ${target} in ++ *-linux-musl*) tm_file="${tm_file} loongarch/musl.h" ++ esac + tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux" +diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h +index 00039ac18..38aa4da2c 100644 +--- a/gcc/config/loongarch/linux.h ++++ b/gcc/config/loongarch/linux.h +@@ -21,7 +21,9 @@ along with GCC; see the file COPYING3. If not see + * This ensures that a compiler configured with --disable-multilib + * can work in a multilib environment. */ + +-#if defined(LA_DISABLE_MULTILIB) && defined(LA_DISABLE_MULTIARCH) ++#if !defined(LA_DEFAULT_TARGET_MUSL) \ ++ && defined(LA_DISABLE_MULTILIB) \ ++ && defined(LA_DISABLE_MULTIARCH) + + #if DEFAULT_ABI_BASE == ABI_BASE_LP64D + #define ABI_LIBDIR "lib64" +diff --git a/gcc/config/loongarch/musl.h b/gcc/config/loongarch/musl.h +new file mode 100644 +index 000000000..fa43bc866 +--- /dev/null ++++ b/gcc/config/loongarch/musl.h +@@ -0,0 +1,23 @@ ++/* Definitions for MUSL C library support. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++ ++#ifndef LA_DEFAULT_TARGET_MUSL ++#define LA_DEFAULT_TARGET_MUSL ++#endif +-- +2.43.0 + diff --git a/0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch b/0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch new file mode 100644 index 0000000..393d41c --- /dev/null +++ b/0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch @@ -0,0 +1,551 @@ +From 2170e0e811cb1b592f7577571f10b5ab95da9eaa Mon Sep 17 00:00:00 2001 +From: chenxiaolong +Date: Fri, 25 Oct 2024 06:05:59 +0000 +Subject: [PATCH 144/188] LoongArch: testsuite:Fix problems with incorrect + results in vector test cases. + +In simd_correctness_check.h, the role of the macro ASSERTEQ_64 is to check the +result of the passed vector values for the 64-bit data of each array element. +It turns out that it uses the abs() function to check only the lower 32 bits +of the data at a time, so it replaces abs() with the llabs() function. + +However, the following two problems may occur after modification: + +1.FAIL in lasx-xvfrint_s.c and lsx-vfrint_s.c +The reason for the error is because vector test cases that use __m{128,256} to +define vector types are composed of 32-bit primitive types, they should use +ASSERTEQ_32 instead of ASSERTEQ_64 to check for correctness. + +2.FAIL in lasx-xvshuf_b.c and lsx-vshuf.c +The cause of the error is that the expected result of the function setting in +the test case is incorrect. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c: Replace + ASSERTEQ_64 with the macro ASSERTEQ_32. + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Modify the expected + test results of some functions according to the function of the vector + instruction. + * gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c: Same + modification as lasx-xvfrint_s.c. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Same + modification as lasx-xvshuf_b.c. + * gcc.target/loongarch/vector/simd_correctness_check.h: Use the llabs() + function instead of abs() to check the correctness of the results. +--- + .../loongarch/vector/lasx/lasx-xvfrint_s.c | 58 +++++++++---------- + .../loongarch/vector/lsx/lsx-vfrint_s.c | 50 ++++++++-------- + .../loongarch/vector/simd_correctness_check.h | 2 +- + 3 files changed, 55 insertions(+), 55 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +index fbfe300ea..4538528a6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvfrint_s.c +@@ -184,7 +184,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -203,7 +203,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -222,7 +222,7 @@ main () + *((int *)&__m256_result[1]) = 0xffffffff; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x01010101; + *((int *)&__m256_op0[6]) = 0x01010101; +@@ -241,7 +241,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -260,7 +260,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -279,7 +279,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -298,7 +298,7 @@ main () + *((int *)&__m256_result[1]) = 0xffffffff; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x01010101; + *((int *)&__m256_op0[6]) = 0x01010101; +@@ -317,7 +317,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrne_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x55555555; + *((int *)&__m256_op0[6]) = 0x36aaaaac; +@@ -336,7 +336,7 @@ main () + *((int *)&__m256_result[1]) = 0x55555555; + *((int *)&__m256_result[0]) = 0x80000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -355,7 +355,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffc741; + *((int *)&__m256_op0[6]) = 0x8a023680; +@@ -374,7 +374,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -393,7 +393,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00200101; + *((int *)&__m256_op0[6]) = 0x01610000; +@@ -412,7 +412,7 @@ main () + *((int *)&__m256_result[1]) = 0x3f800000; + *((int *)&__m256_result[0]) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -431,7 +431,7 @@ main () + *((int *)&__m256_result[1]) = 0xfefefefe; + *((int *)&__m256_result[0]) = 0x3f800000; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x1c1c1c1c; + *((int *)&__m256_op0[6]) = 0x1c1c1c1c; +@@ -450,7 +450,7 @@ main () + *((int *)&__m256_result[1]) = 0xfffffffe; + *((int *)&__m256_result[0]) = 0xffffff00; + __m256_out = __lasx_xvfrintrp_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -469,7 +469,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -488,7 +488,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -507,7 +507,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x5d20a0a1; + *((int *)&__m256_op0[6]) = 0x5d20a0a1; +@@ -526,7 +526,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x001d001d; +@@ -545,7 +545,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -564,7 +564,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -583,7 +583,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrm_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -602,7 +602,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xfffffffe; +@@ -621,7 +621,7 @@ main () + *((int *)&__m256_result[1]) = 0xffffffff; + *((int *)&__m256_result[0]) = 0xfffffffe; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -640,7 +640,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0x00000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x00000000; + *((int *)&__m256_op0[6]) = 0x00000000; +@@ -659,7 +659,7 @@ main () + *((int *)&__m256_result[1]) = 0x00000000; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0x80000000; + *((int *)&__m256_op0[6]) = 0x80000000; +@@ -678,7 +678,7 @@ main () + *((int *)&__m256_result[1]) = 0xffffffff; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xffffffff; + *((int *)&__m256_op0[6]) = 0xffffffff; +@@ -697,7 +697,7 @@ main () + *((int *)&__m256_result[1]) = 0xffffffff; + *((int *)&__m256_result[0]) = 0xffffffff; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + *((int *)&__m256_op0[7]) = 0xf5fffc00; + *((int *)&__m256_op0[6]) = 0xfc000000; +@@ -716,7 +716,7 @@ main () + *((int *)&__m256_result[1]) = 0xf5fffc00; + *((int *)&__m256_result[0]) = 0xfc000000; + __m256_out = __lasx_xvfrintrz_s (__m256_op0); +- ASSERTEQ_64 (__LINE__, __m256_result, __m256_out); ++ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out); + + return 0; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +index 61f28325a..5ba91ee51 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vfrint_s.c +@@ -79,7 +79,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00130013; + *((int *)&__m128_op0[2]) = 0x00130013; +@@ -90,7 +90,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x20202020; + *((int *)&__m128_op0[2]) = 0x20202020; +@@ -101,7 +101,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000000; +@@ -112,7 +112,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xffffffff; + *((int *)&__m128_op0[2]) = 0xffffffff; +@@ -123,7 +123,7 @@ main () + *((int *)&__m128_result[1]) = 0xffffffff; + *((int *)&__m128_result[0]) = 0xffffffff; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000001; +@@ -134,7 +134,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000000; +@@ -145,7 +145,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xfffbfffb; + *((int *)&__m128_op0[2]) = 0xfffbfffb; +@@ -156,7 +156,7 @@ main () + *((int *)&__m128_result[1]) = 0xfffbfffb; + *((int *)&__m128_result[0]) = 0xfffbfffb; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x0ff780a1; + *((int *)&__m128_op0[2]) = 0x0efc01af; +@@ -167,7 +167,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0xfe7f0000; + __m128_out = __lsx_vfrintrne_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000000; +@@ -178,7 +178,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0xefffffff; +@@ -189,7 +189,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xffffffff; + *((int *)&__m128_op0[2]) = 0xffffff00; +@@ -200,7 +200,7 @@ main () + *((int *)&__m128_result[1]) = 0xffffffff; + *((int *)&__m128_result[0]) = 0xffffff00; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xffffb96b; + *((int *)&__m128_op0[2]) = 0xffff57c9; +@@ -211,7 +211,7 @@ main () + *((int *)&__m128_result[1]) = 0xffff6080; + *((int *)&__m128_result[0]) = 0xffff4417; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00ff00ff; + *((int *)&__m128_op0[2]) = 0x00ff00ff; +@@ -222,7 +222,7 @@ main () + *((int *)&__m128_result[1]) = 0x62cbf96e; + *((int *)&__m128_result[0]) = 0x4acfaf40; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00002000; +@@ -233,7 +233,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x3f800000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xffffffff; + *((int *)&__m128_op0[2]) = 0xffffffff; +@@ -244,7 +244,7 @@ main () + *((int *)&__m128_result[1]) = 0xffffffff; + *((int *)&__m128_result[0]) = 0xffffffff; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x63636363; + *((int *)&__m128_op0[2]) = 0x63abdf16; +@@ -255,7 +255,7 @@ main () + *((int *)&__m128_result[1]) = 0x42000000; + *((int *)&__m128_result[0]) = 0x3f800000; + __m128_out = __lsx_vfrintrp_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000000; +@@ -266,7 +266,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xa5c4c774; + *((int *)&__m128_op0[2]) = 0x856ba83b; +@@ -277,7 +277,7 @@ main () + *((int *)&__m128_result[1]) = 0xbf800000; + *((int *)&__m128_result[0]) = 0x54691124; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00010002; +@@ -288,7 +288,7 @@ main () + *((int *)&__m128_result[1]) = 0xffffffff; + *((int *)&__m128_result[0]) = 0xffd60015; + __m128_out = __lsx_vfrintrm_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0xffffffff; + *((int *)&__m128_op0[2]) = 0x3c992b2e; +@@ -299,7 +299,7 @@ main () + *((int *)&__m128_result[1]) = 0xffffffff; + *((int *)&__m128_result[0]) = 0xffff730f; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000001; +@@ -310,7 +310,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x18171615; + *((int *)&__m128_op0[2]) = 0x17161514; +@@ -321,7 +321,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x62cbf96e; + *((int *)&__m128_op0[2]) = 0x4acfaf40; +@@ -332,7 +332,7 @@ main () + *((int *)&__m128_result[1]) = 0xf0bc9a52; + *((int *)&__m128_result[0]) = 0x78285a4a; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + *((int *)&__m128_op0[3]) = 0x00000000; + *((int *)&__m128_op0[2]) = 0x00000000; +@@ -343,7 +343,7 @@ main () + *((int *)&__m128_result[1]) = 0x00000000; + *((int *)&__m128_result[0]) = 0x00000000; + __m128_out = __lsx_vfrintrz_s (__m128_op0); +- ASSERTEQ_64 (__LINE__, __m128_result, __m128_out); ++ ASSERTEQ_32 (__LINE__, __m128_result, __m128_out); + + return 0; + } +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +index 551340bd5..c1adab586 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h ++++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h +@@ -10,7 +10,7 @@ + { \ + long long *temp_ref = (long long *)&ref[i], \ + *temp_res = (long long *)&res[i]; \ +- if (abs (*temp_ref - *temp_res) > 0) \ ++ if (llabs (*temp_ref - *temp_res) > 0) \ + { \ + printf (" error: %s at line %ld , expected " #ref \ + "[%ld]:0x%016lx, got: 0x%016lx\n", \ +-- +2.43.0 + diff --git a/0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch b/0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch new file mode 100644 index 0000000..4c0d097 --- /dev/null +++ b/0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch @@ -0,0 +1,130 @@ +From 44a9ae67e19c0d744bd744cb0e9ae9e0069e40f1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 5 Mar 2024 14:43:04 +0800 +Subject: [PATCH 145/188] LoongArch: Fixed an issue with the implementation of + the template atomic_compare_and_swapsi. + +If the hardware does not support LAMCAS, atomic_compare_and_swapsi needs to be +implemented through "ll.w+sc.w". In the implementation of the instruction sequence, +it is necessary to determine whether the two registers are equal. +Since LoongArch's comparison instructions do not distinguish between 32-bit +and 64-bit, the two operand registers that need to be compared are symbolically +extended, and one of the operand registers is obtained from memory through the +"ll.w" instruction, which can ensure that the symbolic expansion is carried out. +However, the value of the other operand register is not guaranteed to be the +value of the sign extension. + +gcc/ChangeLog: + + * config/loongarch/sync.md (atomic_cas_value_strong): + In loongarch64, a sign extension operation is added when + operands[2] is a register operand and the mode is SImode. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/atomic-cas-int.C: New test. +--- + gcc/config/loongarch/sync.md | 46 ++++++++++++++----- + .../g++.target/loongarch/atomic-cas-int.C | 32 +++++++++++++ + 2 files changed, 67 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/atomic-cas-int.C + +diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md +index 5da5c2780..2e008c487 100644 +--- a/gcc/config/loongarch/sync.md ++++ b/gcc/config/loongarch/sync.md +@@ -245,18 +245,42 @@ + (clobber (match_scratch:GPR 5 "=&r"))] + "" + { +- return "1:\\n\\t" +- "ll.\\t%0,%1\\n\\t" +- "bne\\t%0,%z2,2f\\n\\t" +- "or%i3\\t%5,$zero,%3\\n\\t" +- "sc.\\t%5,%1\\n\\t" +- "beqz\\t%5,1b\\n\\t" +- "b\\t3f\\n\\t" +- "2:\\n\\t" +- "%G4\\n\\t" +- "3:\\n\\t"; ++ output_asm_insn ("1:", operands); ++ output_asm_insn ("ll.\t%0,%1", operands); ++ ++ /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the ++ return value of the val_without_const_folding will not be truncated and ++ will be passed directly to the function compare_exchange_strong. ++ However, the instruction 'bne' does not distinguish between 32-bit and ++ 64-bit operations. so if the upper 32 bits of the register are not ++ extended by the 32nd bit symbol, then the comparison may not be valid ++ here. This will affect the result of the operation. */ ++ ++ if (TARGET_64BIT && REG_P (operands[2]) ++ && GET_MODE (operands[2]) == SImode) ++ { ++ output_asm_insn ("addi.w\t%5,%2,0", operands); ++ output_asm_insn ("bne\t%0,%5,2f", operands); ++ } ++ else ++ output_asm_insn ("bne\t%0,%z2,2f", operands); ++ ++ output_asm_insn ("or%i3\t%5,$zero,%3", operands); ++ output_asm_insn ("sc.\t%5,%1", operands); ++ output_asm_insn ("beqz\t%5,1b", operands); ++ output_asm_insn ("b\t3f", operands); ++ output_asm_insn ("2:", operands); ++ output_asm_insn ("%G4", operands); ++ output_asm_insn ("3:", operands); ++ ++ return ""; + } +- [(set (attr "length") (const_int 28))]) ++ [(set (attr "length") ++ (if_then_else ++ (and (match_test "GET_MODE (operands[2]) == SImode") ++ (match_test "REG_P (operands[2])")) ++ (const_int 32) ++ (const_int 28)))]) + + (define_insn "atomic_cas_value_strong_amcas" + [(set (match_operand:QHWD 0 "register_operand" "=&r") +diff --git a/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +new file mode 100644 +index 000000000..830ce4826 +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C +@@ -0,0 +1,32 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2" } */ ++ ++#include ++#include ++ ++__attribute__ ((noinline)) long ++val_without_const_folding (long val) ++{ ++ return val; ++} ++ ++int ++main () ++{ ++ int oldval = 0xaa; ++ int newval = 0xbb; ++ std::atomic amo; ++ ++ amo.store (oldval); ++ ++ long longval = val_without_const_folding (0xff80000000000000 + oldval); ++ oldval = static_cast (longval); ++ ++ amo.compare_exchange_strong (oldval, newval); ++ ++ if (newval != amo.load (std::memory_order_relaxed)) ++ __builtin_abort (); ++ ++ return 0; ++} ++ +-- +2.43.0 + diff --git a/0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch b/0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch new file mode 100644 index 0000000..5b0fadf --- /dev/null +++ b/0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch @@ -0,0 +1,30 @@ +From eab751e71d4f4d5e9b2eda55d793fd57541fbc56 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 7 Mar 2024 09:44:03 +0800 +Subject: [PATCH 146/188] LoongArch: testsuite: Add compilation options to the + regname-fp-s9.c. + +When the value of the macro DEFAULT_CFLAGS is set to '-ansi -pedantic-errors', +regname-s9-fp.c will test to fail. To solve this problem, add the compilation +option '-Wno-pedantic -std=gnu90' to this test case. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/regname-fp-s9.c: Add compilation option + '-Wno-pedantic -std=gnu90'. +--- + gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +index d2e3b80f8..77a74f1f6 100644 +--- a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c ++++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c +@@ -1,3 +1,4 @@ + /* { dg-do compile } */ ++/* { dg-additional-options "-Wno-pedantic -std=gnu90" } */ + register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */ + register long fp asm("fp"); /* { dg-warning "register of 'fp' used for multiple global register variables" } */ +-- +2.43.0 + diff --git a/0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch b/0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch new file mode 100644 index 0000000..e6c6463 --- /dev/null +++ b/0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch @@ -0,0 +1,137 @@ +From 465f0653b6e7bf5adb5d1f6c9e8aff2b81a3f27f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 26 Jan 2024 18:28:32 +0800 +Subject: [PATCH 147/188] LoongArch: Emit R_LARCH_RELAX for TLS IE with + non-extreme code model to allow the IE to LE linker relaxation + +In Binutils we need to make IE to LE relaxation only allowed when there +is an R_LARCH_RELAX after R_LARCH_TLE_IE_PC_{HI20,LO12} so an invalid +"partial" relaxation won't happen with the extreme code model. So if we +are emitting %ie_pc_{hi20,lo12} in a non-extreme code model, emit an +R_LARCH_RELAX to allow the relaxation. The IE to LE relaxation does not +require the pcalau12i and the ld instruction to be adjacent, so we don't +need to limit ourselves to use the macro. + +For the distro maintainers backporting changes: this change depends on +r14-8721, without r14-8721 R_LARCH_RELAX can be emitted mistakenly in +the extreme code model. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Support 'Q' for R_LARCH_RELAX for TLS IE. + (loongarch_output_move): Use 'Q' to print R_LARCH_RELAX for TLS + IE. + * config/loongarch/loongarch.md (ld_from_got): Likewise. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/tls-ie-relax.c: New test. + * gcc.target/loongarch/tls-ie-norelax.c: New test. + * gcc.target/loongarch/tls-ie-extreme.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 15 ++++++++++++++- + gcc/config/loongarch/loongarch.md | 2 +- + .../gcc.target/loongarch/tls-ie-extreme.c | 5 +++++ + .../gcc.target/loongarch/tls-ie-norelax.c | 5 +++++ + gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c | 11 +++++++++++ + 5 files changed, 36 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index d23b09cc5..c1dc30b61 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4977,7 +4977,7 @@ loongarch_output_move (rtx dest, rtx src) + if (type == SYMBOL_TLS_LE) + return "lu12i.w\t%0,%h1"; + else +- return "pcalau12i\t%0,%h1"; ++ return "%Q1pcalau12i\t%0,%h1"; + } + + if (src_code == CONST_INT) +@@ -6141,6 +6141,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'L' Print the low-part relocation associated with OP. + 'm' Print one less than CONST_INT OP in decimal. + 'N' Print the inverse of the integer branch condition for comparison OP. ++ 'Q' Print R_LARCH_RELAX for TLS IE. + 'r' Print address 12-31bit relocation associated with OP. + 'R' Print address 32-51bit relocation associated with OP. + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +@@ -6278,6 +6279,18 @@ loongarch_print_operand (FILE *file, rtx op, int letter) + letter); + break; + ++ case 'Q': ++ if (!TARGET_LINKER_RELAXATION) ++ break; ++ ++ if (code == HIGH) ++ op = XEXP (op, 0); ++ ++ if (loongarch_classify_symbolic_expression (op) == SYMBOL_TLS_IE) ++ fprintf (file, ".reloc\t.,R_LARCH_RELAX\n\t"); ++ ++ break; ++ + case 'r': + loongarch_print_operand_reloc (file, op, false /* hi64_part */, + true /* lo_reloc */); +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 248ad12bb..d2c7c3b05 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2620,7 +2620,7 @@ + (match_operand:P 2 "symbolic_operand")))] + UNSPEC_LOAD_FROM_GOT))] + "" +- "ld.\t%0,%1,%L2" ++ "%Q2ld.\t%0,%1,%L2" + [(set_attr "type" "move")] + ) + +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +new file mode 100644 +index 000000000..00c545a3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-extreme.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mcmodel=extreme -mexplicit-relocs=auto -mrelax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +new file mode 100644 +index 000000000..dd6bf3634 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-norelax.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mno-relax" } */ ++/* { dg-final { scan-assembler-not "R_LARCH_RELAX" { target tls_native } } } */ ++ ++#include "tls-ie-relax.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +new file mode 100644 +index 000000000..e9f7569b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/tls-ie-relax.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs -mrelax" } */ ++/* { dg-final { scan-assembler-times "R_LARCH_RELAX" 2 { target tls_native } } } */ ++ ++extern __thread int errno; ++ ++void ++unimplemented (void) ++{ ++ errno = -38; ++} +-- +2.43.0 + diff --git a/0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch b/0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch new file mode 100644 index 0000000..df1e7b1 --- /dev/null +++ b/0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch @@ -0,0 +1,57 @@ +From acc00029aab3cdd1186f1ed4a93db2205cdd166c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 13 Mar 2024 20:44:38 +0800 +Subject: [PATCH 148/188] LoongArch: Remove unused and incorrect + "sge_" define_insn + +If this insn is really used, we'll have something like + + slti $r4,$r0,$r5 + +in the code. The assembler will reject it because slti wants 2 +register operands and 1 immediate operand. But we've not got any bug +report for this, indicating this define_insn is unused at all. + +Note that do_store_flag (in expr.cc) is already converting x >= 1 to +x > 0 unconditionally, so this define_insn is indeed unused and we can +just remove it. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (any_ge): Remove. + (sge_): Remove. +--- + gcc/config/loongarch/loongarch.md | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index d2c7c3b05..1b3525dde 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -517,7 +517,6 @@ + ;; These code iterators allow the signed and unsigned scc operations to use + ;; the same template. + (define_code_iterator any_gt [gt gtu]) +-(define_code_iterator any_ge [ge geu]) + (define_code_iterator any_lt [lt ltu]) + (define_code_iterator any_le [le leu]) + +@@ -3355,15 +3354,6 @@ + [(set_attr "type" "slt") + (set_attr "mode" "")]) + +-(define_insn "*sge_" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (any_ge:GPR (match_operand:X 1 "register_operand" "r") +- (const_int 1)))] +- "" +- "slti\t%0,%.,%1" +- [(set_attr "type" "slt") +- (set_attr "mode" "")]) +- + (define_insn "*slt_" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_lt:GPR (match_operand:X 1 "register_operand" "r") +-- +2.43.0 + diff --git a/0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch b/0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch new file mode 100644 index 0000000..42e7d21 --- /dev/null +++ b/0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch @@ -0,0 +1,85 @@ +From 0dba1a1daef3f043235382f0e8f107313b9bde07 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Thu, 14 Mar 2024 09:26:54 +0800 +Subject: [PATCH 149/188] LoongArch: Remove masking process for operand 3 of + xvpermi.q. + +The behavior of non-zero unused bits in xvpermi.q instruction's +third operand is undefined on LoongArch, according to our +discussion (https://github.com/llvm/llvm-project/pull/83540), +we think that keeping original insn operand as unmodified +state is better solution. + +This patch partially reverts 7b158e036a95b1ab40793dd53bed7dbd770ffdaf. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (lasx_xvpermi_q_): + Remove masking of operand 3. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c: + Reposition operand 3's value into instruction's defined accept range. +--- + gcc/config/loongarch/lasx.md | 5 ----- + .../gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c | 6 +++--- + 2 files changed, 3 insertions(+), 8 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 38f35bad6..f3b5ea373 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -640,8 +640,6 @@ + (set_attr "mode" "")]) + + ;; xvpermi.q +-;; Unused bits in operands[3] need be set to 0 to avoid +-;; causing undefined behavior on LA464. + (define_insn "lasx_xvpermi_q_" + [(set (match_operand:LASX 0 "register_operand" "=f") + (unspec:LASX +@@ -651,9 +649,6 @@ + UNSPEC_LASX_XVPERMI_Q))] + "ISA_HAS_LASX" + { +- int mask = 0x33; +- mask &= INTVAL (operands[3]); +- operands[3] = GEN_INT (mask); + return "xvpermi.q\t%u0,%u2,%3"; + } + [(set_attr "type" "simd_splat") +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +index dbc29d2fb..f89dfc311 100644 +--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvpermi_q.c +@@ -27,7 +27,7 @@ main () + *((unsigned long*)& __m256i_result[2]) = 0x7fff7fff7fff0000; + *((unsigned long*)& __m256i_result[1]) = 0x7fe37fe3001d001d; + *((unsigned long*)& __m256i_result[0]) = 0x7fff7fff7fff0000; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x2a); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x22); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op0[3]) = 0x0000000000000000; +@@ -42,7 +42,7 @@ main () + *((unsigned long*)& __m256i_result[2]) = 0x000000000019001c; + *((unsigned long*)& __m256i_result[1]) = 0x0000000000000000; + *((unsigned long*)& __m256i_result[0]) = 0x00000000000001fe; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xb9); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x31); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + *((unsigned long*)& __m256i_op0[3]) = 0x00ff00ff00ff00ff; +@@ -57,7 +57,7 @@ main () + *((unsigned long*)& __m256i_result[2]) = 0xffff0000ffff0000; + *((unsigned long*)& __m256i_result[1]) = 0x00ff00ff00ff00ff; + *((unsigned long*)& __m256i_result[0]) = 0x00ff00ff00ff00ff; +- __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0xca); ++ __m256i_out = __lasx_xvpermi_q (__m256i_op0, __m256i_op1, 0x02); + ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); + + return 0; +-- +2.43.0 + diff --git a/0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch b/0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch new file mode 100644 index 0000000..c09c6f6 --- /dev/null +++ b/0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch @@ -0,0 +1,48 @@ +From 3ed698858f0ebb12a99ed1cc12c038b533f64b2c Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 25 Oct 2024 06:15:21 +0000 +Subject: [PATCH 150/188] LoongArch: Fix C23 (...) functions returning large + aggregates [PR114175] + +We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named +arguments and there is nothing to advance, but that is not the case +for (...) functions returning by hidden reference which have one such +artificial argument. This is causing gcc.dg/c23-stdarg-6.c and +gcc.dg/c23-stdarg-8.c to fail. + +Fix the issue by checking if arg.type is NULL, as r14-9503 explains. + +gcc/ChangeLog: + + PR target/114175 + * config/loongarch/loongarch.cc + (loongarch_setup_incoming_varargs): Only skip + loongarch_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P + functions if arg.type is NULL. +--- + gcc/config/loongarch/loongarch.cc | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c1dc30b61..1e3981e19 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -767,7 +767,14 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ ++ /* For a C23 variadic function w/o any named argument, and w/o an ++ artifical argument for large return value, skip advancing args. ++ There is such an artifical argument iff. arg.type is non-NULL ++ (PR 114175). */ ++ if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) ++ || arg.type != NULL_TREE) ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +-- +2.43.0 + diff --git a/0151-LoongArch-Remove-unused-useless-definitions.patch b/0151-LoongArch-Remove-unused-useless-definitions.patch new file mode 100644 index 0000000..44e92c2 --- /dev/null +++ b/0151-LoongArch-Remove-unused-useless-definitions.patch @@ -0,0 +1,123 @@ +From 6ee300fd31e000efba141ed8806e56bd03826197 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 15 Mar 2024 09:30:25 +0800 +Subject: [PATCH 151/188] LoongArch: Remove unused/useless definitions. + +This patch removes some unnecessary definitions of target hook functions +according to the documentation of GCC. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_cfun_has_cprestore_slot_p): Delete. + (loongarch_adjust_insn_length): Delete. + (current_section_name): Delete. + (loongarch_split_symbol_type): Delete. + * config/loongarch/loongarch.cc + (loongarch_case_values_threshold): Delete. + (loongarch_spill_class): Delete. + (TARGET_OPTAB_SUPPORTED_P): Delete. + (TARGET_CASE_VALUES_THRESHOLD): Delete. + (TARGET_SPILL_CLASS): Delete. +--- + gcc/config/loongarch/loongarch-protos.h | 5 ----- + gcc/config/loongarch/loongarch.cc | 26 ------------------------- + 2 files changed, 31 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 87b94e8b0..3dac20279 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -93,7 +93,6 @@ extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); + extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); + extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); +-extern bool loongarch_cfun_has_cprestore_slot_p (void); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); + extern bool loongarch_expand_vec_cmp (rtx *); +@@ -135,7 +134,6 @@ extern int loongarch_class_max_nregs (enum reg_class, machine_mode); + extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, + unsigned int, + machine_mode); +-extern int loongarch_adjust_insn_length (rtx_insn *, int); + extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, + const char *, + const char *); +@@ -157,7 +155,6 @@ extern bool loongarch_global_symbol_noweak_p (const_rtx); + extern bool loongarch_weak_symbol_p (const_rtx); + extern bool loongarch_symbol_binds_local_p (const_rtx); + +-extern const char *current_section_name (void); + extern unsigned int current_section_flags (void); + extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + extern bool loongarch_check_zero_div_p (void); +@@ -198,8 +195,6 @@ extern bool loongarch_epilogue_uses (unsigned int); + extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); + extern bool loongarch_split_symbol_type (enum loongarch_symbol_type); + +-typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); +- + extern void loongarch_register_frame_header_opt (void); + extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); + extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 1e3981e19..903c0d4ef 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10812,23 +10812,6 @@ loongarch_expand_vec_cmp (rtx operands[]) + return true; + } + +-/* Implement TARGET_CASE_VALUES_THRESHOLD. */ +- +-unsigned int +-loongarch_case_values_threshold (void) +-{ +- return default_case_values_threshold (); +-} +- +-/* Implement TARGET_SPILL_CLASS. */ +- +-static reg_class_t +-loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED) +-{ +- return NO_REGS; +-} +- + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ + + /* This function is equivalent to default_promote_function_mode_always_promote +@@ -11283,9 +11266,6 @@ loongarch_asm_code_end (void) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary + +-#undef TARGET_OPTAB_SUPPORTED_P +-#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p +- + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p + +@@ -11355,18 +11335,12 @@ loongarch_asm_code_end (void) + #undef TARGET_SCHED_REASSOCIATION_WIDTH + #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width + +-#undef TARGET_CASE_VALUES_THRESHOLD +-#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold +- + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV + #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv + + #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS + #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +-#undef TARGET_SPILL_CLASS +-#define TARGET_SPILL_CLASS loongarch_spill_class +- + #undef TARGET_HARD_REGNO_NREGS + #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs + #undef TARGET_HARD_REGNO_MODE_OK +-- +2.43.0 + diff --git a/0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch b/0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch new file mode 100644 index 0000000..4cf3c5d --- /dev/null +++ b/0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch @@ -0,0 +1,110 @@ +From d569e34b29faee3658014b3900e9553a4880dac0 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 15 Mar 2024 09:30:26 +0800 +Subject: [PATCH 152/188] LoongArch: Change loongarch_expand_vec_cmp()'s return + type from bool to void. + +This function is always return true at the end of function implementation, +so the return value is useless. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (vec_cmp): Remove checking + of loongarch_expand_vec_cmp()'s return value. + (vec_cmpu): Ditto. + * config/loongarch/lsx.md (vec_cmp): Ditto. + (vec_cmpu): Ditto. + * config/loongarch/loongarch-protos.h + (loongarch_expand_vec_cmp): Change loongarch_expand_vec_cmp()'s return + type from bool to void. + * config/loongarch/loongarch.cc (loongarch_expand_vec_cmp): Ditto. +--- + gcc/config/loongarch/lasx.md | 6 ++---- + gcc/config/loongarch/loongarch-protos.h | 2 +- + gcc/config/loongarch/loongarch.cc | 3 +-- + gcc/config/loongarch/lsx.md | 6 ++---- + 4 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index f3b5ea373..45a0a8cc8 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -1378,8 +1378,7 @@ + (match_operand:LASX 3 "register_operand")]))] + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -1390,8 +1389,7 @@ + (match_operand:ILASX 3 "register_operand")]))] + "ISA_HAS_LASX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 3dac20279..8523da8d6 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -95,7 +95,7 @@ extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +-extern bool loongarch_expand_vec_cmp (rtx *); ++extern void loongarch_expand_vec_cmp (rtx *); + extern void loongarch_expand_conditional_branch (rtx *); + extern void loongarch_expand_conditional_move (rtx *); + extern void loongarch_expand_conditional_trap (rtx); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 903c0d4ef..dbb318660 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -10803,13 +10803,12 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode, + } + + /* Expand integer vector comparison */ +-bool ++void + loongarch_expand_vec_cmp (rtx operands[]) + { + + rtx_code code = GET_CODE (operands[1]); + loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]); +- return true; + } + + /* Implement TARGET_PROMOTE_FUNCTION_MODE. */ +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index d5aa3f46f..dc81093e9 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -518,8 +518,7 @@ + (match_operand:LSX 3 "register_operand")]))] + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +@@ -530,8 +529,7 @@ + (match_operand:ILSX 3 "register_operand")]))] + "ISA_HAS_LSX" + { +- bool ok = loongarch_expand_vec_cmp (operands); +- gcc_assert (ok); ++ loongarch_expand_vec_cmp (operands); + DONE; + }) + +-- +2.43.0 + diff --git a/0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch b/0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch new file mode 100644 index 0000000..cdabd1d --- /dev/null +++ b/0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch @@ -0,0 +1,104 @@ +From 6c4a2fbdabab053a2a0fb1041e3ffccc3d853c97 Mon Sep 17 00:00:00 2001 +From: Chenghui Pan +Date: Fri, 15 Mar 2024 09:30:27 +0800 +Subject: [PATCH 153/188] LoongArch: Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + +These macros are completely same in definition, so we can keep the previous one +and eliminate later one. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Combine UNITS_PER_FP_REG and + UNITS_PER_FPREG macros. + (loongarch_hard_regno_nregs): Ditto. + (loongarch_class_max_nregs): Ditto. + (loongarch_get_separate_components): Ditto. + (loongarch_process_components): Ditto. + * config/loongarch/loongarch.h (UNITS_PER_FPREG): Ditto. + (UNITS_PER_HWFPVALUE): Ditto. + (UNITS_PER_FPVALUE): Ditto. +--- + gcc/config/loongarch/loongarch.cc | 10 +++++----- + gcc/config/loongarch/loongarch.h | 7 ++----- + 2 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index dbb318660..8d9cda165 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6773,7 +6773,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + and TRUNC. There's no point allowing sizes smaller than a word, + because the FPU has no appropriate load/store instructions. */ + if (mclass == MODE_INT) +- return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; ++ return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FP_REG; + } + + return false; +@@ -6816,7 +6816,7 @@ loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) + if (LASX_SUPPORTED_MODE_P (mode)) + return 1; + +- return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; ++ return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG; + } + + /* All other registers are word-sized. */ +@@ -6851,7 +6851,7 @@ loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) + else if (LSX_SUPPORTED_MODE_P (mode)) + size = MIN (size, UNITS_PER_LSX_REG); + else +- size = MIN (size, UNITS_PER_FPREG); ++ size = MIN (size, UNITS_PER_FP_REG); + } + left &= ~reg_class_contents[FP_REGS]; + } +@@ -8227,7 +8227,7 @@ loongarch_get_separate_components (void) + if (IMM12_OPERAND (offset)) + bitmap_set_bit (components, regno); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + + /* Don't mess with the hard frame pointer. */ +@@ -8306,7 +8306,7 @@ loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn) + if (bitmap_bit_p (components, regno)) + loongarch_save_restore_reg (mode, regno, offset, fn); + +- offset -= UNITS_PER_FPREG; ++ offset -= UNITS_PER_FP_REG; + } + } + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 8bcdb8729..698e42aec 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -138,19 +138,16 @@ along with GCC; see the file COPYING3. If not see + /* Width of a LASX vector register in bits. */ + #define BITS_PER_LASX_REG (UNITS_PER_LASX_REG * BITS_PER_UNIT) + +-/* For LARCH, width of a floating point register. */ +-#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4) +- + /* The largest size of value that can be held in floating-point + registers and moved with a single instruction. */ + #define UNITS_PER_HWFPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FPREG) ++ (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + + /* The largest size of value that can be held in floating-point + registers. */ + #define UNITS_PER_FPVALUE \ + (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ ++ : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ + : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) + + /* The number of bytes in a double. */ +-- +2.43.0 + diff --git a/0154-LoongArch-Fix-a-typo-PR-114407.patch b/0154-LoongArch-Fix-a-typo-PR-114407.patch new file mode 100644 index 0000000..e55b604 --- /dev/null +++ b/0154-LoongArch-Fix-a-typo-PR-114407.patch @@ -0,0 +1,30 @@ +From 72f18deb0b8e59cc23f25cb99b59a25a0a1d99c7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 21 Mar 2024 04:01:17 +0800 +Subject: [PATCH 154/188] LoongArch: Fix a typo [PR 114407] + +gcc/ChangeLog: + + PR target/114407 + * config/loongarch/loongarch-opts.cc (loongarch_config_target): + Fix typo in diagnostic message, enabing -> enabling. +--- + gcc/config/loongarch/loongarch-opts.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 2ea3972d1..bdecfaf49 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -362,7 +362,7 @@ config_target_isa: + gcc_assert (constrained.simd); + + inform (UNKNOWN_LOCATION, +- "enabing %qs promotes %<%s%s%> to %<%s%s%>", ++ "enabling %qs promotes %<%s%s%> to %<%s%s%>", + loongarch_isa_ext_strings[t.isa.simd], + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[t.isa.fpu], + OPTSTR_ISA_EXT_FPU, loongarch_isa_ext_strings[ISA_EXT_FPU64]); +-- +2.43.0 + diff --git a/0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch b/0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch new file mode 100644 index 0000000..fb5caea --- /dev/null +++ b/0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch @@ -0,0 +1,68 @@ +From e27123a020e7bf0845a9804a4b09fe4ce57992f0 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 6 Feb 2024 17:49:50 +0800 +Subject: [PATCH 155/188] testsuite: Add a test case for negating FP vectors + containing zeros + +Recently I've fixed two wrong FP vector negate implementation which +caused wrong sign bits in zeros in targets (r14-8786 and r14-8801). To +prevent a similar issue from happening again, add a test case. + +Tested on x86_64 (with SSE2, AVX, AVX2, and AVX512F), AArch64, MIPS +(with MSA), LoongArch (with LSX and LASX). + +gcc/testsuite: + + * gcc.dg/vect/vect-neg-zero.c: New test. +--- + gcc/testsuite/gcc.dg/vect/vect-neg-zero.c | 38 +++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-neg-zero.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +new file mode 100644 +index 000000000..21fa00cfa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-neg-zero.c +@@ -0,0 +1,38 @@ ++/* { dg-add-options ieee } */ ++/* { dg-additional-options "-fno-associative-math -fsigned-zeros" } */ ++ ++double x[4] = {-0.0, 0.0, -0.0, 0.0}; ++float y[8] = {-0.0, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0}; ++ ++static __attribute__ ((always_inline)) inline void ++test (int factor) ++{ ++ double a[4]; ++ float b[8]; ++ ++ asm ("" ::: "memory"); ++ ++ for (int i = 0; i < 2 * factor; i++) ++ a[i] = -x[i]; ++ ++ for (int i = 0; i < 4 * factor; i++) ++ b[i] = -y[i]; ++ ++#pragma GCC novector ++ for (int i = 0; i < 2 * factor; i++) ++ if (__builtin_signbit (a[i]) == __builtin_signbit (x[i])) ++ __builtin_abort (); ++ ++#pragma GCC novector ++ for (int i = 0; i < 4 * factor; i++) ++ if (__builtin_signbit (b[i]) == __builtin_signbit (y[i])) ++ __builtin_abort (); ++} ++ ++int ++main (void) ++{ ++ test (1); ++ test (2); ++ return 0; ++} +-- +2.43.0 + diff --git a/0156-LoongArch-Add-descriptions-of-the-compilation-option.patch b/0156-LoongArch-Add-descriptions-of-the-compilation-option.patch new file mode 100644 index 0000000..4e7ef02 --- /dev/null +++ b/0156-LoongArch-Add-descriptions-of-the-compilation-option.patch @@ -0,0 +1,83 @@ +From 899f1f351ddc0d76bc9d432cfe63b30cfb294860 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 25 Oct 2024 06:22:11 +0000 +Subject: [PATCH 156/188] LoongArch: Add descriptions of the compilation + options. + +Add descriptions for the compilation options '-mfrecipe' '-mdiv32' +'-mlam-bh' '-mlamcas' and '-mld-seq-sa'. + +gcc/ChangeLog: + + * doc/invoke.texi: Add descriptions for the compilation + options. +--- + gcc/doc/invoke.texi | 45 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 5c6515cb1..7f24fe1e2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1008,8 +1008,9 @@ Objective-C and Objective-C++ Dialects}. + -mmax-inline-memcpy-size=@var{n} @gol + -mexplicit-relocs -mno-explicit-relocs @gol + -mdirect-extern-access -mno-direct-extern-access @gol +--mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} @gol +--mrecip -mrecip=@var{opt} ++-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol ++-mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol ++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24686,6 +24687,46 @@ Enable the approximation for vectorized reciprocal square root. + So, for example, @option{-mrecip=all,!sqrt} enables + all of the reciprocal approximations, except for scalar square root. + ++@opindex mfrecipe ++@opindex mno-frecipe ++@item -mfrecipe ++@itemx -mno-frecipe ++Use (do not use) @code{frecipe.@{s/d@}} and @code{frsqrte.@{s/d@}} ++instructions. When build with @option{-march=la664}, it is enabled by default. ++The default is @option{-mno-frecipe}. ++ ++@opindex mdiv32 ++@opindex mno-div32 ++@item -mdiv32 ++@itemx -mno-div32 ++Use (do not use) @code{div.w[u]} and @code{mod.w[u]} instructions with input ++not sign-extended. When build with @option{-march=la664}, it is enabled by ++default. The default is @option{-mno-div32}. ++ ++@opindex mlam-bh ++@opindex mno-lam-bh ++@item -mlam-bh ++@itemx -mno-lam-bh ++Use (do not use) @code{am@{swap/add@}[_db].@{b/h@}} instructions. When build ++with @option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lam-bh}. ++ ++@opindex mlamcas ++@opindex mno-lamcas ++@item -mlamcas ++@itemx -mno-lamcas ++Use (do not use) @code{amcas[_db].@{b/h/w/d@}} instructions. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-lamcas}. ++ ++@opindex mld-seq-sa ++@opindex mno-ld-seq-sa ++@item -mld-seq-sa ++@itemx -mno-ld-seq-sa ++Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with ++@option{-march=la664}, it is enabled by default. The default is ++@option{-mno-ld-seq-sa}, the load-load barrier is needed. ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +-- +2.43.0 + diff --git a/0157-LoongArch-Split-loongarch_option_override_internal-i.patch b/0157-LoongArch-Split-loongarch_option_override_internal-i.patch new file mode 100644 index 0000000..2967cb0 --- /dev/null +++ b/0157-LoongArch-Split-loongarch_option_override_internal-i.patch @@ -0,0 +1,800 @@ +From 6dd3434f004dd1481a3d18fb416b3ddd4151b10f Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Sat, 30 Mar 2024 16:43:14 +0800 +Subject: [PATCH 157/188] LoongArch: Split loongarch_option_override_internal + into smaller procedures + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in: Mark -m[no-]recip as + aliases to -mrecip={all,none}, respectively. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch-def.h (ABI_FPU_64): Rename to... + (ABI_FPU64_P): ...this. + (ABI_FPU_32): Rename to... + (ABI_FPU32_P): ...this. + (ABI_FPU_NONE): Rename to... + (ABI_NOFPU_P): ...this. + (ABI_LP64_P): Define. + * config/loongarch/loongarch.cc (loongarch_init_print_operand_punct): + Merged into loongarch_global_init. + (loongarch_cpu_option_override): Renamed to + loongarch_target_option_override. + (loongarch_option_override_internal): Move the work after + loongarch_config_target into loongarch_target_option_override. + (loongarch_global_init): Define. + (INIT_TARGET_FLAG): Move to loongarch-opts.cc. + (loongarch_option_override): Call loongarch_global_init + separately. + * config/loongarch/loongarch-opts.cc (loongarch_parse_mrecip_scheme): + Split the parsing of -mrecip= from + loongarch_option_override_internal. + (loongarch_generate_mrecip_scheme): Define. Split from + loongarch_option_override_internal. + (loongarch_target_option_override): Define. Renamed from + loongarch_cpu_option_override. + (loongarch_init_misc_options): Define. Split from + loongarch_option_override_internal. + (INIT_TARGET_FLAG): Move from loongarch.cc. + * config/loongarch/loongarch-opts.h (loongarch_target_option_override): + New prototype. + (loongarch_parse_mrecip_scheme): New prototype. + (loongarch_init_misc_options): New prototype. + (TARGET_ABI_LP64): Simplify with ABI_LP64_P. + * config/loongarch/loongarch.h (TARGET_RECIP_DIV): Simplify. + Do not reference specific CPU architecture (LA664). + (TARGET_RECIP_SQRT): Same. + (TARGET_RECIP_RSQRT): Same. + (TARGET_RECIP_VEC_DIV): Same. + (TARGET_RECIP_VEC_SQRT): Same. + (TARGET_RECIP_VEC_RSQRT): Same. +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 8 +- + gcc/config/loongarch/loongarch-def.h | 11 +- + gcc/config/loongarch/loongarch-opts.cc | 253 ++++++++++++++++++ + gcc/config/loongarch/loongarch-opts.h | 27 +- + gcc/config/loongarch/loongarch.cc | 253 +++--------------- + gcc/config/loongarch/loongarch.h | 18 +- + gcc/config/loongarch/loongarch.opt | 8 +- + 7 files changed, 342 insertions(+), 236 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 4d6b1902d..9c6f59bb8 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -197,14 +197,14 @@ mexplicit-relocs + Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + +-mrecip +-Target RejectNegative Var(la_recip) Save +-Generate approximate reciprocal divide and square root for better throughput. +- + mrecip= + Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + ++mrecip ++Target Alias(mrecip=, all, none) ++Generate approximate reciprocal divide and square root for better throughput. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index fdcf43fc7..b1423bcfe 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -90,11 +90,16 @@ extern loongarch_def_array + + #define TO_LP64_ABI_BASE(C) (C) + +-#define ABI_FPU_64(abi_base) \ ++#define ABI_LP64_P(abi_base) \ ++ (abi_base == ABI_BASE_LP64D \ ++ || abi_base == ABI_BASE_LP64F \ ++ || abi_base == ABI_BASE_LP64S) ++ ++#define ABI_FPU64_P(abi_base) \ + (abi_base == ABI_BASE_LP64D) +-#define ABI_FPU_32(abi_base) \ ++#define ABI_FPU32_P(abi_base) \ + (abi_base == ABI_BASE_LP64F) +-#define ABI_FPU_NONE(abi_base) \ ++#define ABI_NOFPU_P(abi_base) \ + (abi_base == ABI_BASE_LP64S) + + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index bdecfaf49..404642a9e 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see + #include "coretypes.h" + #include "tm.h" + #include "obstack.h" ++#include "opts.h" + #include "diagnostic-core.h" + + #include "loongarch-cpu.h" +@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-str.h" + #include "loongarch-def.h" + ++/* Target configuration */ + struct loongarch_target la_target; + ++/* RTL cost information */ ++const struct loongarch_rtx_cost_data *loongarch_cost; ++ + /* ABI-related configuration. */ + #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi)) + static const struct loongarch_abi +@@ -795,3 +800,251 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* ISA evolution features */ + opts->x_la_isa_evolution = target->isa.evolution; + } ++ ++/* -mrecip= handling */ ++static struct ++ { ++ const char *string; /* option name. */ ++ unsigned int mask; /* mask bits to set. */ ++ } ++const recip_options[] = { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "rsqrt", RECIP_MASK_RSQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, ++}; ++ ++/* Parser for -mrecip=. */ ++unsigned int ++loongarch_parse_mrecip_scheme (const char *recip_string) ++{ ++ unsigned int result_mask = RECIP_MASK_NONE; ++ ++ if (recip_string) ++ { ++ char *p = ASTRDUP (recip_string); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_options[i].string)) ++ { ++ mask = recip_options[i].mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ if (invert) ++ result_mask &= ~mask; ++ else ++ result_mask |= mask; ++ } ++ } ++ return result_mask; ++} ++ ++/* Generate -mrecip= argument based on the mask. */ ++const char* ++loongarch_generate_mrecip_scheme (unsigned int mask) ++{ ++ static char recip_scheme_str[128]; ++ int p = 0, tmp; ++ ++ switch (mask) ++ { ++ case RECIP_MASK_ALL: ++ return "all"; ++ ++ case RECIP_MASK_NONE: ++ return "none"; ++ } ++ ++ for (unsigned long i = 2; i < ARRAY_SIZE (recip_options); i++) ++ { ++ if (mask & recip_options[i].mask) ++ { ++ if ((tmp = strlen (recip_options[i].string) + 1) >= 127 - p) ++ gcc_unreachable (); ++ ++ recip_scheme_str[p] = ','; ++ strcpy (recip_scheme_str + p + 1, recip_options[i].string); ++ p += tmp; ++ } ++ } ++ recip_scheme_str[p] = '\0'; ++ return recip_scheme_str + 1; ++} ++ ++ ++ ++/* Refresh the switches acccording to the resolved loongarch_target struct. */ ++void ++loongarch_target_option_override (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ loongarch_update_gcc_opt_status (target, opts, opts_set); ++ ++ /* alignments */ ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions ++ = loongarch_cpu_align[target->cpu_tune].function; ++ ++ if (opts->x_flag_align_labels && !opts->x_str_align_labels) ++ opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label; ++ ++ /* Set up parameters to be used in prefetching algorithm. */ ++ int simultaneous_prefetches ++ = loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches; ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, ++ simultaneous_prefetches); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, ++ loongarch_cpu_cache[target->cpu_tune].l1d_line_size); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, ++ loongarch_cpu_cache[target->cpu_tune].l1d_size); ++ ++ SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, ++ loongarch_cpu_cache[target->cpu_tune].l2d_size); ++ ++ /* Other arch-specific overrides. */ ++ switch (target->cpu_arch) ++ { ++ case CPU_LA664: ++ /* Enable -mrecipe=all for LA664 by default. */ ++ if (!opts_set->x_recip_mask) ++ { ++ opts->x_recip_mask = RECIP_MASK_ALL; ++ opts_set->x_recip_mask = 1; ++ } ++ } ++ ++ /* -mrecip= */ ++ opts->x_la_recip_name ++ = loongarch_generate_mrecip_scheme (opts->x_recip_mask); ++ ++ /* Decide which rtx_costs structure to use. */ ++ if (opts->x_optimize_size) ++ loongarch_cost = &loongarch_rtx_cost_optimize_size; ++ else ++ loongarch_cost = &loongarch_cpu_rtx_cost_data[target->cpu_tune]; ++ ++ /* If the user hasn't specified a branch cost, use the processor's ++ default. */ ++ if (!opts_set->x_la_branch_cost) ++ opts->x_la_branch_cost = loongarch_cost->branch_cost; ++ ++ /* other stuff */ ++ if (ABI_LP64_P (target->abi.base)) ++ opts->x_flag_pcc_struct_return = 0; ++ ++ switch (target->cmodel) ++ { ++ case CMODEL_EXTREME: ++ if (opts->x_flag_plt) ++ { ++ if (opts_set->x_flag_plt) ++ error ("code model %qs is not compatible with %s", ++ "extreme", "-fplt"); ++ opts->x_flag_plt = 0; ++ } ++ break; ++ ++ case CMODEL_TINY_STATIC: ++ case CMODEL_MEDIUM: ++ case CMODEL_NORMAL: ++ case CMODEL_TINY: ++ case CMODEL_LARGE: ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++ ++/* Resolve options that's not covered by la_target. */ ++void ++loongarch_init_misc_options (struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ if (opts->x_flag_pic) ++ opts->x_g_switch_value = 0; ++ ++ /* -mrecip options. */ ++ opts->x_recip_mask = loongarch_parse_mrecip_scheme (opts->x_la_recip_name); ++ ++#define INIT_TARGET_FLAG(NAME, INIT) \ ++ { \ ++ if (!(opts_set->x_target_flags & MASK_##NAME)) \ ++ { \ ++ if (INIT) \ ++ opts->x_target_flags |= MASK_##NAME; \ ++ else \ ++ opts->x_target_flags &= ~MASK_##NAME; \ ++ } \ ++ } ++ ++ /* Enable conditional moves for int and float by default. */ ++ INIT_TARGET_FLAG (COND_MOVE_INT, 1) ++ INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) ++ ++ /* Set mrelax default. */ ++ INIT_TARGET_FLAG (LINKER_RELAXATION, ++ HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++ ++#undef INIT_TARGET_FLAG ++ ++ /* Set mexplicit-relocs default. */ ++ if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET) ++ opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS ++ ? (TARGET_LINKER_RELAXATION ++ ? EXPLICIT_RELOCS_AUTO ++ : EXPLICIT_RELOCS_ALWAYS) ++ : EXPLICIT_RELOCS_NONE); ++ ++ /* Enable sw prefetching at -O3 and higher. */ ++ if (opts->x_flag_prefetch_loop_arrays < 0 ++ && (opts->x_optimize >= 3 || opts->x_flag_profile_use) ++ && !opts->x_optimize_size) ++ opts->x_flag_prefetch_loop_arrays = 1; ++ ++ if (TARGET_DIRECT_EXTERN_ACCESS_OPTS_P (opts) && opts->x_flag_shlib) ++ error ("%qs cannot be used for compiling a shared library", ++ "-mdirect-extern-access"); ++ ++ /* Enforce that interval is the same size as size so the mid-end does the ++ right thing. */ ++ SET_OPTION_IF_UNSET (opts, opts_set, ++ param_stack_clash_protection_probe_interval, ++ param_stack_clash_protection_guard_size); ++} +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 463812136..177d587da 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -30,6 +30,10 @@ along with GCC; see the file COPYING3. If not see + /* Target configuration */ + extern struct loongarch_target la_target; + ++/* RTL cost information */ ++extern const struct loongarch_rtx_cost_data *loongarch_cost; ++ ++ + /* Initialize loongarch_target from separate option variables. */ + void + loongarch_init_target (struct loongarch_target *target, +@@ -46,11 +50,30 @@ loongarch_config_target (struct loongarch_target *target, + struct loongarch_flags *flags, + int follow_multilib_list_p); + ++ ++/* Refresh the switches acccording to the resolved loongarch_target struct. */ ++void ++loongarch_target_option_override (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set); ++ ++ + /* option status feedback for "gcc --help=target -Q" */ + void + loongarch_update_gcc_opt_status (struct loongarch_target *target, + struct gcc_options *opts, + struct gcc_options *opts_set); ++ ++ ++/* Parser for -mrecip=. */ ++unsigned int ++loongarch_parse_mrecip_scheme (const char *recip_string); ++ ++ ++/* Resolve options that's not covered by la_target. */ ++void ++loongarch_init_misc_options (struct gcc_options *opts, ++ struct gcc_options *opts_set); + #endif + + /* Flag status */ +@@ -80,9 +103,7 @@ struct loongarch_flags { + #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) + + #define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) +-#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ +- || la_target.abi.base == ABI_BASE_LP64F \ +- || la_target.abi.base == ABI_BASE_LP64S) ++#define TARGET_ABI_LP64 ABI_LP64_P(la_target.abi.base) + + #define ISA_HAS_LSX \ + (la_target.isa.simd == ISA_EXT_SIMD_LSX \ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d9cda165..c2f3739d0 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -208,9 +208,6 @@ const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { + FRAME_REGS, FRAME_REGS + }; + +-/* Which cost information to use. */ +-static const struct loongarch_rtx_cost_data *loongarch_cost; +- + /* Information about a single argument. */ + struct loongarch_arg_info + { +@@ -5908,17 +5905,6 @@ loongarch_print_operand_punctuation (FILE *file, int ch) + } + } + +-/* Initialize loongarch_print_operand_punct. */ +- +-static void +-loongarch_init_print_operand_punct (void) +-{ +- const char *p; +- +- for (p = ".$"; *p; p++) +- loongarch_print_operand_punct[(unsigned char) *p] = true; +-} +- + /* PRINT_OPERAND prefix LETTER refers to the integer branch instruction + associated with condition CODE. Print the condition part of the + opcode to FILE. */ +@@ -7622,118 +7608,15 @@ loongarch_init_machine_status (void) + } + + static void +-loongarch_cpu_option_override (struct loongarch_target *target, +- struct gcc_options *opts, +- struct gcc_options *opts_set) +-{ +- /* alignments */ +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions +- = loongarch_cpu_align[target->cpu_tune].function; +- +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label; +- +- /* Set up parameters to be used in prefetching algorithm. */ +- int simultaneous_prefetches +- = loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches; +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, +- simultaneous_prefetches); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, +- loongarch_cpu_cache[target->cpu_tune].l1d_line_size); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, +- loongarch_cpu_cache[target->cpu_tune].l1d_size); +- +- SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, +- loongarch_cpu_cache[target->cpu_tune].l2d_size); +-} +- +-static void +-loongarch_option_override_internal (struct gcc_options *opts, +- struct gcc_options *opts_set) ++loongarch_global_init (void) + { +- int i, regno, mode; +- +- if (flag_pic) +- g_switch_value = 0; +- +- loongarch_init_target (&la_target, +- la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu, +- la_opt_simd, la_opt_abi_base, la_opt_abi_ext, +- la_opt_cmodel, opts->x_la_isa_evolution, +- opts_set->x_la_isa_evolution); +- +- /* Handle target-specific options: compute defaults/conflicts etc. */ +- loongarch_config_target (&la_target, NULL, 0); +- +- loongarch_update_gcc_opt_status (&la_target, opts, opts_set); +- loongarch_cpu_option_override (&la_target, opts, opts_set); +- +- if (TARGET_ABI_LP64) +- flag_pcc_struct_return = 0; +- +- /* Decide which rtx_costs structure to use. */ +- if (optimize_size) +- loongarch_cost = &loongarch_rtx_cost_optimize_size; +- else +- loongarch_cost = &loongarch_cpu_rtx_cost_data[la_target.cpu_tune]; +- +- /* If the user hasn't specified a branch cost, use the processor's +- default. */ +- if (la_branch_cost == 0) +- la_branch_cost = loongarch_cost->branch_cost; +- +- /* Enable sw prefetching at -O3 and higher. */ +- if (opts->x_flag_prefetch_loop_arrays < 0 +- && (opts->x_optimize >= 3 || opts->x_flag_profile_use) +- && !opts->x_optimize_size) +- opts->x_flag_prefetch_loop_arrays = 1; +- +- if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib) +- error ("%qs cannot be used for compiling a shared library", +- "-mdirect-extern-access"); +- +- switch (la_target.cmodel) +- { +- case CMODEL_EXTREME: +- if (opts->x_flag_plt) +- { +- if (global_options_set.x_flag_plt) +- error ("code model %qs is not compatible with %s", +- "extreme", "-fplt"); +- opts->x_flag_plt = 0; +- } +- break; +- +- case CMODEL_TINY_STATIC: +- case CMODEL_MEDIUM: +- case CMODEL_NORMAL: +- case CMODEL_TINY: +- case CMODEL_LARGE: +- break; +- +- default: +- gcc_unreachable (); +- } +- +- /* Validate the guard size. */ +- int guard_size = param_stack_clash_protection_guard_size; +- +- /* Enforce that interval is the same size as size so the mid-end does the +- right thing. */ +- SET_OPTION_IF_UNSET (opts, &global_options_set, +- param_stack_clash_protection_probe_interval, +- guard_size); +- +- loongarch_init_print_operand_punct (); ++ /* Initialize loongarch_print_operand_punct. */ ++ for (const char *p = ".$"; *p; p++) ++ loongarch_print_operand_punct[(unsigned char) *p] = true; + + /* Set up array to map GCC register number to debug register number. + Ignore the special purpose register numbers. */ +- +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (GP_REG_P (i) || FP_REG_P (i)) + loongarch_dwarf_regno[i] = i; +@@ -7742,115 +7625,53 @@ loongarch_option_override_internal (struct gcc_options *opts, + } + + /* Set up loongarch_hard_regno_mode_ok. */ +- for (mode = 0; mode < MAX_MACHINE_MODE; mode++) +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ for (int mode = 0; mode < MAX_MACHINE_MODE; mode++) ++ for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_p[mode][regno] + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; ++}; + +- /* -mrecip options. */ +- static struct +- { +- const char *string; /* option name. */ +- unsigned int mask; /* mask bits to set. */ +- } +- const recip_options[] = { +- { "all", RECIP_MASK_ALL }, +- { "none", RECIP_MASK_NONE }, +- { "div", RECIP_MASK_DIV }, +- { "sqrt", RECIP_MASK_SQRT }, +- { "rsqrt", RECIP_MASK_RSQRT }, +- { "vec-div", RECIP_MASK_VEC_DIV }, +- { "vec-sqrt", RECIP_MASK_VEC_SQRT }, +- { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, +- }; +- +- if (la_recip_name) +- { +- char *p = ASTRDUP (la_recip_name); +- char *q; +- unsigned int mask, i; +- bool invert; +- +- while ((q = strtok (p, ",")) != NULL) +- { +- p = NULL; +- if (*q == '!') +- { +- invert = true; +- q++; +- } +- else +- invert = false; +- +- if (!strcmp (q, "default")) +- mask = RECIP_MASK_ALL; +- else +- { +- for (i = 0; i < ARRAY_SIZE (recip_options); i++) +- if (!strcmp (q, recip_options[i].string)) +- { +- mask = recip_options[i].mask; +- break; +- } +- +- if (i == ARRAY_SIZE (recip_options)) +- { +- error ("unknown option for %<-mrecip=%s%>", q); +- invert = false; +- mask = RECIP_MASK_NONE; +- } +- } +- +- if (invert) +- recip_mask &= ~mask; +- else +- recip_mask |= mask; +- } +- } +- if (la_recip) +- recip_mask |= RECIP_MASK_ALL; +- if (!ISA_HAS_FRECIPE) +- recip_mask = RECIP_MASK_NONE; +- +-#define INIT_TARGET_FLAG(NAME, INIT) \ +- { \ +- if (!(target_flags_explicit & MASK_##NAME)) \ +- { \ +- if (INIT) \ +- target_flags |= MASK_##NAME; \ +- else \ +- target_flags &= ~MASK_##NAME; \ +- } \ +- } +- +- /* Enable conditional moves for int and float by default. */ +- INIT_TARGET_FLAG (COND_MOVE_INT, 1) +- INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1) +- +- /* Set mrelax default. */ +- INIT_TARGET_FLAG (LINKER_RELAXATION, +- HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION) ++static void ++loongarch_option_override_internal (struct loongarch_target *target, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ /* Handle options not covered by struct loongarch_target. */ ++ loongarch_init_misc_options (opts, opts_set); ++ ++ /* Resolve the target struct. */ ++ loongarch_init_target (target, ++ opts->x_la_opt_cpu_arch, ++ opts->x_la_opt_cpu_tune, ++ opts->x_la_opt_fpu, ++ opts->x_la_opt_simd, ++ opts->x_la_opt_abi_base, ++ opts->x_la_opt_abi_ext, ++ opts->x_la_opt_cmodel, ++ opts->x_la_isa_evolution, ++ opts_set->x_la_isa_evolution); + +-#undef INIT_TARGET_FLAG ++ loongarch_config_target (target, NULL, 0); + +- if (la_opt_explicit_relocs == M_OPT_UNSET) +- la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +- ? (TARGET_LINKER_RELAXATION +- ? EXPLICIT_RELOCS_AUTO +- : EXPLICIT_RELOCS_ALWAYS) +- : EXPLICIT_RELOCS_NONE); ++ /* Override some options according to the resolved target. */ ++ loongarch_target_option_override (target, opts, opts_set); + } + +- + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + loongarch_option_override (void) + { +- loongarch_option_override_internal (&global_options, &global_options_set); ++ /* Setting up the target configuration. */ ++ loongarch_option_override_internal (&la_target, ++ &global_options, ++ &global_options_set); ++ ++ /* Global initializations. */ ++ loongarch_global_init (); + } + + /* Implement TARGET_OPTION_SAVE. */ +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 698e42aec..221e8b286 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -710,12 +710,18 @@ enum reg_class + | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ + | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) + +-#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) +-#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) ++#define TARGET_RECIP_DIV \ ++ ((recip_mask & RECIP_MASK_DIV) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_SQRT \ ++ ((recip_mask & RECIP_MASK_SQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_RSQRT \ ++ ((recip_mask & RECIP_MASK_RSQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_DIV \ ++ ((recip_mask & RECIP_MASK_VEC_DIV) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_SQRT \ ++ ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 && ISA_HAS_FRECIPE) ++#define TARGET_RECIP_VEC_RSQRT \ ++ ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 && ISA_HAS_FRECIPE) + + /* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. */ +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 75d230067..ea848cd76 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -205,14 +205,14 @@ mexplicit-relocs + Target Alias(mexplicit-relocs=, always, none) + Use %reloc() assembly operators (for backward compatibility). + +-mrecip +-Target RejectNegative Var(la_recip) Save +-Generate approximate reciprocal divide and square root for better throughput. +- + mrecip= + Target RejectNegative Joined Var(la_recip_name) Save + Control generation of reciprocal estimates. + ++mrecip ++Target Alias(mrecip=, all, none) ++Generate approximate reciprocal divide and square root for better throughput. ++ + ; The code model option names for -mcmodel. + Enum + Name(cmodel) Type(int) +-- +2.43.0 + diff --git a/0158-LoongArch-Regenerate-loongarch.opt.urls.patch b/0158-LoongArch-Regenerate-loongarch.opt.urls.patch new file mode 100644 index 0000000..58990ad --- /dev/null +++ b/0158-LoongArch-Regenerate-loongarch.opt.urls.patch @@ -0,0 +1,117 @@ +From 90a0f195830a25e4179127c67e873c80f758f29d Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 25 Oct 2024 06:25:39 +0000 +Subject: [PATCH 158/188] LoongArch: Regenerate loongarch.opt.urls. + +Fixes: d28ea8e5a704 ("LoongArch: Split loongarch_option_override_internal + into smaller procedures") + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt.urls | 92 +++++++++++++++++++++++++ + 1 file changed, 92 insertions(+) + create mode 100644 gcc/config/loongarch/loongarch.opt.urls + +diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls +new file mode 100644 +index 000000000..571c504e6 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch.opt.urls +@@ -0,0 +1,92 @@ ++; Autogenerated by regenerate-opt-urls.py from gcc/config/loongarch/loongarch.opt and generated HTML ++ ++mfpu= ++UrlSuffix(gcc/LoongArch-Options.html#index-mfpu-2) ++ ++msoft-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-5) ++ ++msingle-float ++UrlSuffix(gcc/LoongArch-Options.html#index-msingle-float) ++ ++mdouble-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mdouble-float-1) ++ ++msimd= ++UrlSuffix(gcc/LoongArch-Options.html#index-msimd-1) ++ ++march= ++UrlSuffix(gcc/LoongArch-Options.html#index-march-7) ++ ++mtune= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtune-8) ++ ++mabi= ++UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2) ++ ++mbranch-cost= ++UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2) ++ ++mcheck-zero-division ++UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division) ++ ++mcond-move-int ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-int) ++ ++mcond-move-float ++UrlSuffix(gcc/LoongArch-Options.html#index-mcond-move-float) ++ ++mmemcpy ++UrlSuffix(gcc/LoongArch-Options.html#index-mmemcpy) ++ ++mstrict-align ++UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1) ++ ++mmax-inline-memcpy-size= ++UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size) ++ ++mexplicit-relocs= ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mexplicit-relocs ++UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) ++ ++mrecip= ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mrecip ++UrlSuffix(gcc/LoongArch-Options.html#index-mrecip) ++ ++mcmodel= ++UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1) ++ ++mdirect-extern-access ++UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access) ++ ++mrelax ++UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2) ++ ++mpass-mrelax-to-as ++UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as) ++ ++mtls-dialect= ++UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1) ++ ++mannotate-tablejump ++UrlSuffix(gcc/LoongArch-Options.html#index-mannotate-tablejump) ++ ++mfrecipe ++UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe) ++ ++mdiv32 ++UrlSuffix(gcc/LoongArch-Options.html#index-mdiv32) ++ ++mlam-bh ++UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh) ++ ++mlamcas ++UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas) ++ ++mld-seq-sa ++UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa) ++ +-- +2.43.0 + diff --git a/0159-LoongArch-Add-support-for-TLS-descriptors.patch b/0159-LoongArch-Add-support-for-TLS-descriptors.patch new file mode 100644 index 0000000..eb8ea00 --- /dev/null +++ b/0159-LoongArch-Add-support-for-TLS-descriptors.patch @@ -0,0 +1,724 @@ +From 0d5ff38a94dbd655bc86e0be262458ac71726ea4 Mon Sep 17 00:00:00 2001 +From: mengqinggang +Date: Tue, 2 Apr 2024 09:57:20 +0800 +Subject: [PATCH 159/188] LoongArch: Add support for TLS descriptors. + +Add support for TLS descriptors on normal code model and extreme +code model. + +Normal code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r4,$r4,%desc_pc_lo12(s) + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +Extreme code model instruction sequence: + -mno-explicit-relocs: + la.tls.desc $r4, $r12, s + add.d $r12, $r4, $r2 + -mexplicit-relocs: + pcalau12i $r4,%desc_pc_hi20(s) + addi.d $r12,$r0,%desc_pc_lo12(s) + lu32i.d $r12,%desc64_pc_lo20(s) + lu52i.d $r12,$r12,%desc64_pc_hi12(s) + add.d $r4,$r4,$r12 + ld.d $r1,$r4,%desc_ld(s) + jirl $r1,$r1,%desc_call(s) + add.d $r12, $r4, $r2 + +The default is still traditional TLS model, but can be configured with +--with-tls={trad,desc}. The default can change to TLS descriptors once +libc and LLVM support this. + +gcc/ChangeLog: + + * config.gcc: Add --with-tls option to change TLS flavor. + * config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to + configure TLS flavor. + * config/loongarch/loongarch-def.h (struct loongarch_target): Add + tls_dialect. + * config/loongarch/loongarch-driver.cc (la_driver_init): Add tls + flavor. + * config/loongarch/loongarch-opts.cc (loongarch_init_target): Add + tls_dialect. + (loongarch_config_target): Ditto. + (loongarch_update_gcc_opt_status): Ditto. + * config/loongarch/loongarch-opts.h (loongarch_init_target): Ditto. + (TARGET_TLS_DESC): New define. + * config/loongarch/loongarch.cc (loongarch_symbol_insns): Add TLS + DESC instructions sequence length. + (loongarch_legitimize_tls_address): New TLS DESC instruction sequence. + (loongarch_option_override_internal): Add la_opt_tls_dialect. + (loongarch_option_restore): Add la_target.tls_dialect. + * config/loongarch/loongarch.md (@got_load_tls_desc): Normal + code model for TLS DESC. + (got_load_tls_desc_off64): Extreme cmode model for TLS DESC. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Ditto. + * doc/invoke.texi: Add a description of the compilation option + '-mtls-dialect={trad,desc}'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/cmodel-extreme-1.c: Add -mtls-dialect=trad. + * gcc.target/loongarch/cmodel-extreme-2.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: Ditto. + * gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: + Ditto. + * gcc.target/loongarch/func-call-medium-1.c: Ditto. + * gcc.target/loongarch/func-call-medium-2.c: Ditto. + * gcc.target/loongarch/func-call-medium-3.c: Ditto. + * gcc.target/loongarch/func-call-medium-4.c: Ditto. + * gcc.target/loongarch/tls-extreme-macro.c: Ditto. + * gcc.target/loongarch/tls-gd-noplt.c: Ditto. + * gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-auto-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: New test. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: New test. + +Co-authored-by: Lulu Cheng +Co-authored-by: Xi Ruoyao +--- + gcc/config.gcc | 19 +++++- + gcc/config/loongarch/genopts/loongarch.opt.in | 14 ++++ + gcc/config/loongarch/loongarch-def.h | 7 ++ + gcc/config/loongarch/loongarch-driver.cc | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 12 +++- + gcc/config/loongarch/loongarch-opts.h | 3 + + gcc/config/loongarch/loongarch.cc | 45 ++++++++---- + gcc/config/loongarch/loongarch.md | 68 +++++++++++++++++++ + gcc/config/loongarch/loongarch.opt | 14 ++++ + gcc/doc/invoke.texi | 16 ++++- + .../gcc.target/loongarch/cmodel-extreme-1.c | 2 +- + .../gcc.target/loongarch/cmodel-extreme-2.c | 2 +- + .../explicit-relocs-auto-extreme-tls-desc.c | 10 +++ + .../loongarch/explicit-relocs-auto-tls-desc.c | 10 +++ + .../explicit-relocs-auto-tls-ld-gd.c | 2 +- + .../explicit-relocs-extreme-tls-desc.c | 16 +++++ + ...icit-relocs-medium-call36-auto-tls-ld-gd.c | 2 +- + .../loongarch/explicit-relocs-tls-desc.c | 13 ++++ + .../gcc.target/loongarch/func-call-medium-1.c | 2 +- + .../gcc.target/loongarch/func-call-medium-2.c | 2 +- + .../gcc.target/loongarch/func-call-medium-3.c | 2 +- + .../gcc.target/loongarch/func-call-medium-4.c | 2 +- + .../gcc.target/loongarch/tls-extreme-macro.c | 2 +- + .../gcc.target/loongarch/tls-gd-noplt.c | 2 +- + 24 files changed, 243 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 499b36b45..1db558d4c 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -4982,7 +4982,7 @@ case "${target}" in + ;; + + loongarch*-*) +- supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib" ++ supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls" + + # Local variables + unset \ +@@ -5240,6 +5240,18 @@ case "${target}" in + with_multilib_list="${abi_base}/${abi_ext}" + fi + ++ # Handle --with-tls. ++ case "$with_tls" in ++ "" \ ++ | trad | desc) ++ # OK ++ ;; ++ *) ++ echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2 ++ exit 1 ++ ;; ++ esac ++ + # Check if the configured default ABI combination is included in + # ${with_multilib_list}. + loongarch_multilib_list_sane=no +@@ -5875,6 +5887,11 @@ case ${target} in + lasx) tm_defines="$tm_defines DEFAULT_ISA_EXT_SIMD=ISA_EXT_SIMD_LASX" ;; + esac + ++ case ${with_tls} in ++ "" | trad) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_TRADITIONAL" ;; ++ desc) tm_defines="$tm_defines DEFAULT_TLS_TYPE=TLS_DESCRIPTORS" ;; ++ esac ++ + tmake_file="loongarch/t-loongarch $tmake_file" + ;; + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 9c6f59bb8..f3d53f03c 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -245,6 +245,20 @@ mpass-mrelax-to-as + Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + ++Enum ++Name(tls_type) Type(int) ++The possible TLS dialects: ++ ++EnumValue ++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) ++ ++EnumValue ++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) ++ ++mtls-dialect= ++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save ++Specify TLS dialect. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index b1423bcfe..2fe44da5a 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -180,6 +180,7 @@ struct loongarch_target + int cpu_arch; /* CPU_ */ + int cpu_tune; /* same */ + int cmodel; /* CMODEL_ */ ++ int tls_dialect; /* TLS_ */ + }; + + /* CPU model */ +@@ -193,6 +194,12 @@ enum { + N_TUNE_TYPES = 5 + }; + ++/* TLS types. */ ++enum { ++ TLS_TRADITIONAL = 0, ++ TLS_DESCRIPTORS = 1 ++}; ++ + /* CPU model properties */ + extern loongarch_def_array + loongarch_cpu_strings; +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index b84a6eaf7..8551cf94d 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -45,7 +45,7 @@ la_driver_init (int argc ATTRIBUTE_UNUSED, const char **argv ATTRIBUTE_UNUSED) + /* Initialize all fields of la_target. */ + loongarch_init_target (&la_target, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, + M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, M_OPT_UNSET, +- 0, 0); ++ M_OPT_UNSET, 0, 0); + return ""; + } + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 404642a9e..062d430c2 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -144,6 +144,7 @@ void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, + int abi_base, int abi_ext, int cmodel, ++ int tls_dialect, + HOST_WIDE_INT isa_evolution, + HOST_WIDE_INT isa_evolution_set) + { +@@ -158,6 +159,7 @@ loongarch_init_target (struct loongarch_target *target, + target->abi.base = abi_base; + target->abi.ext = abi_ext; + target->cmodel = cmodel; ++ target->tls_dialect = tls_dialect; + } + + +@@ -179,7 +181,8 @@ loongarch_config_target (struct loongarch_target *target, + obstack_init (&msg_obstack); + + struct { +- int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, abi_flt; ++ int arch, tune, fpu, simd, abi_base, abi_ext, cmodel, ++ tls_dialect, abi_flt; + } constrained = { + M_OPT_ABSENT (target->cpu_arch) ? 0 : 1, + M_OPT_ABSENT (target->cpu_tune) ? 0 : 1, +@@ -188,6 +191,7 @@ loongarch_config_target (struct loongarch_target *target, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + M_OPT_ABSENT (target->abi.ext) ? 0 : 1, + M_OPT_ABSENT (target->cmodel) ? 0 : 1, ++ M_OPT_ABSENT (target->tls_dialect) ? 0 : 1, + M_OPT_ABSENT (target->abi.base) ? 0 : 1, + }; + +@@ -556,6 +560,9 @@ fallback: + gcc_unreachable (); + } + ++ t.tls_dialect = constrained.tls_dialect ? target->tls_dialect ++ : DEFAULT_TLS_TYPE; ++ + /* Cleanup and return. */ + obstack_free (&msg_obstack, NULL); + *target = t; +@@ -791,6 +798,9 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, + /* status of -mcmodel */ + opts->x_la_opt_cmodel = target->cmodel; + ++ /* status of -mtls-dialect */ ++ opts->x_la_opt_tls_dialect = target->tls_dialect; ++ + /* status of -mfpu */ + opts->x_la_opt_fpu = target->isa.fpu; + +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 177d587da..a3b467f4c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -39,6 +39,7 @@ void + loongarch_init_target (struct loongarch_target *target, + int cpu_arch, int cpu_tune, int fpu, int simd, + int abi_base, int abi_ext, int cmodel, ++ int tls_dialect, + HOST_WIDE_INT isa_evolutions, + HOST_WIDE_INT isa_evolutions_set); + +@@ -105,6 +106,8 @@ struct loongarch_flags { + #define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64) + #define TARGET_ABI_LP64 ABI_LP64_P(la_target.abi.base) + ++#define TARGET_TLS_DESC (la_target.tls_dialect == TLS_DESCRIPTORS) ++ + #define ISA_HAS_LSX \ + (la_target.isa.simd == ISA_EXT_SIMD_LSX \ + || la_target.isa.simd == ISA_EXT_SIMD_LASX) +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c2f3739d0..e27335b3c 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -2029,7 +2029,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) + + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: +- return 3; ++ return TARGET_TLS_DESC ? 4 : 3; + + case SYMBOL_PCREL64: + return 5; +@@ -2930,24 +2930,43 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) + static rtx + loongarch_legitimize_tls_address (rtx loc) + { +- rtx dest, tp, tmp, tmp1, tmp2, tmp3; ++ rtx dest, tp, tmp, tmp1, tmp2, tmp3, a0; + enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); + rtx_insn *insn; + + switch (model) + { + case TLS_MODEL_LOCAL_DYNAMIC: +- tmp = gen_rtx_REG (Pmode, GP_RETURN); +- dest = gen_reg_rtx (Pmode); +- insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); +- emit_libcall_block (insn, dest, tmp, loc); +- break; +- ++ if (!TARGET_TLS_DESC) ++ { ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ break; ++ } ++ /* Fall through. */ + case TLS_MODEL_GLOBAL_DYNAMIC: +- tmp = gen_rtx_REG (Pmode, GP_RETURN); +- dest = gen_reg_rtx (Pmode); +- insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); +- emit_libcall_block (insn, dest, tmp, loc); ++ if (TARGET_TLS_DESC) ++ { ++ a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); ++ dest = gen_reg_rtx (Pmode); ++ tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); ++ ++ if (TARGET_CMODEL_EXTREME) ++ emit_insn (gen_got_load_tls_desc_off64 (loc, gen_reg_rtx (DImode))); ++ else ++ emit_insn (gen_got_load_tls_desc (Pmode, loc)); ++ ++ emit_insn (gen_add3_insn (dest, a0, tp)); ++ } ++ else ++ { ++ tmp = gen_rtx_REG (Pmode, GP_RETURN); ++ dest = gen_reg_rtx (Pmode); ++ insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); ++ emit_libcall_block (insn, dest, tmp, loc); ++ } + break; + + case TLS_MODEL_INITIAL_EXEC: +@@ -7651,6 +7670,7 @@ loongarch_option_override_internal (struct loongarch_target *target, + opts->x_la_opt_abi_base, + opts->x_la_opt_abi_ext, + opts->x_la_opt_cmodel, ++ opts->x_la_opt_tls_dialect, + opts->x_la_isa_evolution, + opts_set->x_la_isa_evolution); + +@@ -7697,6 +7717,7 @@ loongarch_option_restore (struct gcc_options *, + la_target.isa.evolution = ptr->x_la_isa_evolution; + + la_target.cmodel = ptr->x_la_opt_cmodel; ++ la_target.tls_dialect = ptr->x_la_opt_tls_dialect; + } + + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 1b3525dde..95beb88fe 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -52,6 +52,8 @@ + + ;; TLS + UNSPEC_TLS ++ UNSPEC_TLS_DESC ++ UNSPEC_TLS_DESC_OFF64 + + ;; Stack tie + UNSPEC_TIE +@@ -127,6 +129,15 @@ + (T1_REGNUM 13) + (S0_REGNUM 23) + ++ (FCC0_REGNUM 64) ++ (FCC1_REGNUM 65) ++ (FCC2_REGNUM 66) ++ (FCC3_REGNUM 67) ++ (FCC4_REGNUM 68) ++ (FCC5_REGNUM 69) ++ (FCC6_REGNUM 70) ++ (FCC7_REGNUM 71) ++ + ;; Return path styles + (NORMAL_RETURN 0) + (SIBCALL_RETURN 1) +@@ -2759,6 +2770,63 @@ + + ;; Thread-Local Storage + ++(define_insn "@got_load_tls_desc" ++ [(set (reg:P 4) ++ (unspec:P ++ [(match_operand:P 0 "symbolic_operand" "")] ++ UNSPEC_TLS_DESC)) ++ (clobber (reg:SI FCC0_REGNUM)) ++ (clobber (reg:SI FCC1_REGNUM)) ++ (clobber (reg:SI FCC2_REGNUM)) ++ (clobber (reg:SI FCC3_REGNUM)) ++ (clobber (reg:SI FCC4_REGNUM)) ++ (clobber (reg:SI FCC5_REGNUM)) ++ (clobber (reg:SI FCC6_REGNUM)) ++ (clobber (reg:SI FCC7_REGNUM)) ++ (clobber (reg:SI RETURN_ADDR_REGNUM))] ++ "TARGET_TLS_DESC" ++{ ++ return TARGET_EXPLICIT_RELOCS ++ ? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t" ++ "addi.d\t$r4,$r4,%%desc_pc_lo12(%0)\n\t" ++ "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t" ++ "jirl\t$r1,$r1,%%desc_call(%0)" ++ : "la.tls.desc\t$r4,%0"; ++} ++ [(set_attr "got" "load") ++ (set_attr "mode" "") ++ (set_attr "length" "16")]) ++ ++(define_insn "got_load_tls_desc_off64" ++ [(set (reg:DI 4) ++ (unspec:DI ++ [(match_operand:DI 0 "symbolic_operand" "")] ++ UNSPEC_TLS_DESC_OFF64)) ++ (clobber (reg:SI FCC0_REGNUM)) ++ (clobber (reg:SI FCC1_REGNUM)) ++ (clobber (reg:SI FCC2_REGNUM)) ++ (clobber (reg:SI FCC3_REGNUM)) ++ (clobber (reg:SI FCC4_REGNUM)) ++ (clobber (reg:SI FCC5_REGNUM)) ++ (clobber (reg:SI FCC6_REGNUM)) ++ (clobber (reg:SI FCC7_REGNUM)) ++ (clobber (reg:SI RETURN_ADDR_REGNUM)) ++ (clobber (match_operand:DI 1 "register_operand" "=&r"))] ++ "TARGET_TLS_DESC && TARGET_CMODEL_EXTREME" ++{ ++ return TARGET_EXPLICIT_RELOCS ++ ? "pcalau12i\t$r4,%%desc_pc_hi20(%0)\n\t" ++ "addi.d\t%1,$r0,%%desc_pc_lo12(%0)\n\t" ++ "lu32i.d\t%1,%%desc64_pc_lo20(%0)\n\t" ++ "lu52i.d\t%1,%1,%%desc64_pc_hi12(%0)\n\t" ++ "add.d\t$r4,$r4,%1\n\t" ++ "ld.d\t$r1,$r4,%%desc_ld(%0)\n\t" ++ "jirl\t$r1,$r1,%%desc_call(%0)" ++ : "la.tls.desc\t$r4,%1,%0"; ++} ++ [(set_attr "got" "load") ++ (set_attr "length" "28")]) ++ + (define_insn "@load_tls" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index ea848cd76..6f730d886 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -253,6 +253,20 @@ mpass-mrelax-to-as + Driver Var(la_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION) + Pass -mrelax or -mno-relax option to the assembler. + ++Enum ++Name(tls_type) Type(int) ++The possible TLS dialects: ++ ++EnumValue ++Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) ++ ++EnumValue ++Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) ++ ++mtls-dialect= ++Target RejectNegative Joined Enum(tls_type) Var(la_opt_tls_dialect) Init(M_OPT_UNSET) Save ++Specify TLS dialect. ++ + -param=loongarch-vect-unroll-limit= + Target Joined UInteger Var(la_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param + Used to limit unroll factor which indicates how much the autovectorizer may +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7f24fe1e2..c9a1969ad 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1010,7 +1010,8 @@ Objective-C and Objective-C++ Dialects}. + -mdirect-extern-access -mno-direct-extern-access @gol + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol +--mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa} ++-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol ++-mtls-dialect=@var{opt}} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24727,6 +24728,19 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with + @option{-march=la664}, it is enabled by default. The default is + @option{-mno-ld-seq-sa}, the load-load barrier is needed. + ++@opindex mtls-dialect ++@item -mtls-dialect=@var{opt} ++This option controls which tls dialect may be used for general dynamic and ++local dynamic TLS models. ++ ++@table @samp ++@item trad ++Use traditional TLS. This is the default. ++ ++@item desc ++Use TLS descriptors. ++@end table ++ + @item loongarch-vect-unroll-limit + The vectorizer will use available tuning information to determine whether it + would be beneficial to unroll the main vectorized loop and by how much. This +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +index 564ee4017..6269607e7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=always -fdump-rtl-final" } */ + + int a; + extern int b; +diff --git a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +index ce834805f..35f6ee0bb 100644 +--- a/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=auto -fdump-rtl-final" } */ + + #include "cmodel-extreme-1.c" + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c +new file mode 100644 +index 000000000..0fc7a1a51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-extreme-tls-desc.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mcmodel=extreme -mexplicit-relocs=auto -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\\$r12,\\.LANCHOR0" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\\$r12,\\.LANCHOR0" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c +new file mode 100644 +index 000000000..37947ecfd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-desc.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\.LANCHOR0" { target tls_native } } } */ ++/* { dg-final { scan-assembler "la\\.tls\\.desc\t\\\$r4,\\.LANCHOR0" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +index ca55fcfc5..b47e37c82 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=trad" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +new file mode 100644 +index 000000000..3797556e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\\\$r4,%desc_pc_hi20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "addi.d\t\\\$r12,\\\$r0,%desc_pc_lo12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "lu32i.d\t\\\$r12,%desc64_pc_lo20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "lu52i.d\t\\\$r12,\\\$r12,%desc64_pc_hi12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r4,\\\$r4,\\\$r12" { target tls_native } } } */ ++/* { dg-final { scan-assembler "ld.d\t\\\$r1,\\\$r4,%desc_ld\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "jirl\t\\\$r1,\\\$r1,%desc_call\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r12,\\\$r4,\\\$r2" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +index d1a482083..cfb855323 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mtls-dialect=trad -mcmodel=medium -fplt" } */ + /* { dg-final { scan-assembler "pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && loongarch_call36_support } } } } */ + + #include "./explicit-relocs-auto-tls-ld-gd.c" +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +new file mode 100644 +index 000000000..f66903091 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++ ++__thread int a __attribute__((visibility("hidden"))); ++extern __thread int b __attribute__((visibility("default"))); ++ ++int test() { return a + b; } ++ ++/* { dg-final { scan-assembler "pcalau12i\t\\\$r4,%desc_pc_hi20\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "addi.d\t\\\$r4,\\\$r4,%desc_pc_lo12\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "ld.d\t\\\$r1,\\\$r4,%desc_ld\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "jirl\t\\\$r1,\\\$r1,%desc_call\\\(\\.LANCHOR0\\\)" { target tls_native } } } */ ++/* { dg-final { scan-assembler "add.d\t\\\$r12,\\\$r4,\\\$r2" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c +index 6339e832f..5e81df552 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c +index a53e75e0b..d73df2dd8 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.local\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c +index 0da7bf98e..88a667450 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c +index 0219688ae..f9dc12fea 100644 +--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c ++++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=medium" } */ ++/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mtls-dialect=trad -mcmodel=medium" } */ + /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */ + /* { dg-final { scan-assembler "test1:.*la\.local\t.*f\n\tjirl" } } */ + /* { dg-final { scan-assembler "test2:.*la\.local\t.*l\n\tjirl" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +index 4341f8212..4adda4202 100644 +--- a/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c ++++ b/gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -fno-plt -mexplicit-relocs=none" } */ ++/* { dg-options "-march=loongarch64 -mabi=lp64d -O2 -mcmodel=extreme -mtls-dialect=trad -fno-plt -mexplicit-relocs=none" } */ + /* { dg-final { scan-assembler "test_le:.*la.tls.le\t\\\$r\[0-9\]+,\\\.L" { target tls_native } } } */ + /* { dg-final { scan-assembler "test_ie:.*la.tls.ie\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\.L" { target tls_native } } } */ + /* { dg-final { scan-assembler "test_ld:.*la.tls.ld\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\.L.*la.global\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,__tls_get_addr" { target tls_native } } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c +index 9432c477e..dfa1bf53c 100644 +--- a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c ++++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O0 -fno-plt -mcmodel=normal -mexplicit-relocs" } */ ++/* { dg-options "-O0 -fno-plt -mcmodel=normal -mtls-dialect=trad -mexplicit-relocs" } */ + /* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)\n\tld\.d.*%got_pc_lo12\\(__tls_get_addr\\)" { target tls_native } } } */ + + __attribute__ ((tls_model ("global-dynamic"))) __thread int a; +-- +2.43.0 + diff --git a/0160-LoongArch-Fix-missing-plugin-header.patch b/0160-LoongArch-Fix-missing-plugin-header.patch new file mode 100644 index 0000000..0fc9a39 --- /dev/null +++ b/0160-LoongArch-Fix-missing-plugin-header.patch @@ -0,0 +1,32 @@ +From bec97638d68c760f6ee4b0a86ce4f9ffe9a691b3 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Tue, 2 Apr 2024 09:20:32 +0800 +Subject: [PATCH 160/188] LoongArch: Fix missing plugin header + +gcc/ChangeLog: + + * config/loongarch/t-loongarch: Add loongarch-def-arrays.h + to OPTION_H_EXTRA. +--- + gcc/config/loongarch/t-loongarch | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 994f4d19c..488e8cff3 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -18,8 +18,9 @@ + + + GTM_H += loongarch-multilib.h +-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ +- $(srcdir)/config/loongarch/loongarch-tune.h \ ++OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ ++ $(srcdir)/config/loongarch/loongarch-def-array.h \ ++ $(srcdir)/config/loongarch/loongarch-tune.h \ + $(srcdir)/config/loongarch/loongarch-cpucfg-map.h + + # Canonical target triplet from config.gcc +-- +2.43.0 + diff --git a/0161-LoongArch-Remove-unused-code.patch b/0161-LoongArch-Remove-unused-code.patch new file mode 100644 index 0000000..b52ccfe --- /dev/null +++ b/0161-LoongArch-Remove-unused-code.patch @@ -0,0 +1,344 @@ +From 47581dd6da960172bc768435400010748b3f97eb Mon Sep 17 00:00:00 2001 +From: Jiahao Xu +Date: Wed, 3 Apr 2024 09:38:23 +0800 +Subject: [PATCH 161/188] LoongArch: Remove unused code. + +gcc/ChangeLog: + + * config/loongarch/lasx.md: Remove unused code. + * config/loongarch/loongarch-protos.h + (loongarch_split_lsx_copy_d): Remove. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/loongarch.cc + (loongarch_split_lsx_copy_d): Ditto. + (loongarch_split_lsx_insert_d): Ditto. + (loongarch_split_lsx_fill_d): Ditto. + * config/loongarch/lsx.md (lsx_vpickve2gr_du): Remove splitter. + (lsx_vpickve2gr_): Ditto. + (abs2): Remove expander. + (vabs2): Rename 2 abs2. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/vector/lsx/lsx-abs.c: New test. +--- + gcc/config/loongarch/lasx.md | 12 +-- + gcc/config/loongarch/loongarch-protos.h | 3 - + gcc/config/loongarch/loongarch.cc | 76 ---------------- + gcc/config/loongarch/lsx.md | 89 ++----------------- + .../gcc.target/loongarch/vector/lsx/lsx-abs.c | 26 ++++++ + 5 files changed, 35 insertions(+), 171 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 45a0a8cc8..44a7d58ff 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -572,12 +572,7 @@ + (match_operand 3 "const__operand" "")))] + "ISA_HAS_LASX" + { +-#if 0 +- if (!TARGET_64BIT && (mode == V4DImode || mode == V4DFmode)) +- return "#"; +- else +-#endif +- return "xvinsgr2vr.\t%u0,%z1,%y3"; ++ return "xvinsgr2vr.\t%u0,%z1,%y3"; + } + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) +@@ -1446,10 +1441,7 @@ + if (which_alternative == 1) + return "xvldi.b\t%u0,0" ; + +- if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) +- return "#"; +- else +- return "xvreplgr2vr.\t%u0,%z1"; ++ return "xvreplgr2vr.\t%u0,%z1"; + } + [(set_attr "type" "simd_fill") + (set_attr "mode" "") +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 8523da8d6..0c31a74b7 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -89,9 +89,6 @@ extern void loongarch_split_128bit_move (rtx, rtx); + extern bool loongarch_split_128bit_move_p (rtx, rtx); + extern void loongarch_split_256bit_move (rtx, rtx); + extern bool loongarch_split_256bit_move_p (rtx, rtx); +-extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx)); +-extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx); +-extern void loongarch_split_lsx_fill_d (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index e27335b3c..8d8a50b70 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4772,82 +4772,6 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + } + +- +-/* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function +- used to generate subregs. */ +- +-void +-loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index, +- rtx (*gen_fn)(rtx, rtx, rtx)) +-{ +- gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode) +- || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (dest, false); +- rtx high = loongarch_subword (dest, true); +- rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- +- emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2))); +- emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1))); +-} +- +-/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */ +- +-void +-loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2) +-{ +- int i; +- gcc_assert (GET_MODE (dest) == GET_MODE (src1)); +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src2) == DImode || src2 == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low = loongarch_subword (src2, false); +- rtx high = loongarch_subword (src2, true); +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0); +- i = exact_log2 (INTVAL (index)); +- gcc_assert (i != -1); +- +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1, +- GEN_INT (1 << (i * 2)))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, +- GEN_INT (1 << (i * 2 + 1)))); +-} +- +-/* Split FILL.D. */ +- +-void +-loongarch_split_lsx_fill_d (rtx dest, rtx src) +-{ +- gcc_assert ((GET_MODE (dest) == V2DImode +- && (GET_MODE (src) == DImode || src == const0_rtx)) +- || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode)); +- +- /* Note that low is always from the lower index, and high is always +- from the higher index. */ +- rtx low, high; +- if (src == const0_rtx) +- { +- low = src; +- high = src; +- } +- else +- { +- low = loongarch_subword (src, false); +- high = loongarch_subword (src, true); +- } +- rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low)); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1))); +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3))); +-} +- +- + /* Return the appropriate instructions to move SRC into DEST. Assume + that SRC is operand 1 and DEST is operand 0. */ + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index dc81093e9..2eac11473 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -582,28 +582,11 @@ + (match_operand 3 "const__operand" "")))] + "ISA_HAS_LSX" + { +- if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) +- return "#"; +- else +- return "vinsgr2vr.\t%w0,%z1,%y3"; ++ return "vinsgr2vr.\t%w0,%z1,%y3"; + } + [(set_attr "type" "simd_insert") + (set_attr "mode" "")]) + +-(define_split +- [(set (match_operand:LSX_D 0 "register_operand") +- (vec_merge:LSX_D +- (vec_duplicate:LSX_D +- (match_operand: 1 "_operand")) +- (match_operand:LSX_D 2 "register_operand") +- (match_operand 3 "const__operand")))] +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- [(const_int 0)] +-{ +- loongarch_split_lsx_insert_d (operands[0], operands[2], operands[3], operands[1]); +- DONE; +-}) +- + (define_insn "lsx_vextrins__internal" + [(set (match_operand:LSX 0 "register_operand" "=f") + (vec_merge:LSX +@@ -653,70 +636,26 @@ + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +-(define_insn_and_split "lsx_vpickve2gr_du" ++(define_insn "lsx_vpickve2gr_du" + [(set (match_operand:DI 0 "register_operand" "=r") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "f") + (parallel [(match_operand 2 "const_0_or_1_operand" "")])))] + "ISA_HAS_LSX" +-{ +- if (TARGET_64BIT) +- return "vpickve2gr.du\t%0,%w1,%2"; +- else +- return "#"; +-} +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- [(const_int 0)] +-{ +- loongarch_split_lsx_copy_d (operands[0], operands[1], operands[2], +- gen_lsx_vpickve2gr_wu); +- DONE; +-} ++ "vpickve2gr.du\t%0,%w1,%2" + [(set_attr "type" "simd_copy") + (set_attr "mode" "V2DI")]) + +-(define_insn_and_split "lsx_vpickve2gr_" ++(define_insn "lsx_vpickve2gr_" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: + (match_operand:LSX_D 1 "register_operand" "f") + (parallel [(match_operand 2 "const__operand" "")])))] + "ISA_HAS_LSX" +-{ +- if (TARGET_64BIT) +- return "vpickve2gr.\t%0,%w1,%2"; +- else +- return "#"; +-} +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- [(const_int 0)] +-{ +- loongarch_split_lsx_copy_d (operands[0], operands[1], operands[2], +- gen_lsx_vpickve2gr_w); +- DONE; +-} ++ "vpickve2gr.\t%0,%w1,%2" + [(set_attr "type" "simd_copy") + (set_attr "mode" "")]) + +- +-(define_expand "abs2" +- [(match_operand:ILSX 0 "register_operand" "=f") +- (abs:ILSX (match_operand:ILSX 1 "register_operand" "f"))] +- "ISA_HAS_LSX" +-{ +- if (ISA_HAS_LSX) +- { +- emit_insn (gen_vabs2 (operands[0], operands[1])); +- DONE; +- } +- else +- { +- rtx reg = gen_reg_rtx (mode); +- emit_move_insn (reg, CONST0_RTX (mode)); +- emit_insn (gen_lsx_vadda_ (operands[0], operands[1], reg)); +- DONE; +- } +-}) +- + (define_expand "neg2" + [(set (match_operand:ILSX 0 "register_operand") + (neg:ILSX (match_operand:ILSX 1 "register_operand")))] +@@ -1369,25 +1308,11 @@ + if (which_alternative == 1) + return "vldi.\t%w0,0"; + +- if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode)) +- return "#"; +- else +- return "vreplgr2vr.\t%w0,%z1"; ++ return "vreplgr2vr.\t%w0,%z1"; + } + [(set_attr "type" "simd_fill") + (set_attr "mode" "")]) + +-(define_split +- [(set (match_operand:LSX_D 0 "register_operand") +- (vec_duplicate:LSX_D +- (match_operand: 1 "register_operand")))] +- "reload_completed && ISA_HAS_LSX && !TARGET_64BIT" +- [(const_int 0)] +-{ +- loongarch_split_lsx_fill_d (operands[0], operands[1]); +- DONE; +-}) +- + (define_insn "logb2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] +@@ -2428,7 +2353,7 @@ + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +-(define_insn "vabs2" ++(define_insn "abs2" + [(set (match_operand:ILSX 0 "register_operand" "=f") + (abs:ILSX (match_operand:ILSX 1 "register_operand" "f")))] + "ISA_HAS_LSX" +diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c +new file mode 100644 +index 000000000..cf971badb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx" } */ ++/* { dg-final { scan-assembler-times "vsigncov.w" 1 } } */ ++/* { dg-final { scan-assembler-times "vsigncov.d" 1 } } */ ++ ++int a[4], b[4]; ++ ++extern int abs (int); ++ ++void ++foo1 (void) ++{ ++ for (int i = 0; i < 4; i++) ++ a[i] = abs (b[i]); ++} ++ ++long la[2], lb[2]; ++ ++extern long labs (long); ++ ++void ++foo2 (void) ++{ ++ for (int i = 0; i < 2; i++) ++ la[i] = labs (lb[i]); ++} +-- +2.43.0 + diff --git a/0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch b/0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch new file mode 100644 index 0000000..42277b0 --- /dev/null +++ b/0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch @@ -0,0 +1,135 @@ +From 7dff9d3f7fefe074e78cd7ff6529d7c1ea6cc3b1 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Tue, 2 Apr 2024 14:29:08 +0800 +Subject: [PATCH 162/188] LoongArch: Set default alignment for functions jumps + and loops [PR112919]. + +Xi Ruoyao set the alignment rules under LA464 in commit r14-1839, +but the macro ASM_OUTPUT_ALIGN_WITH_NOP was removed in R14-4674, +which affected the alignment rules. + +So I set different aligns on LA464 and LA664 again to test the +performance of spec2006, and modify the alignment based on the test +results. + +gcc/ChangeLog: + + PR target/112919 + * config/loongarch/loongarch-def.cc (la664_align): Newly defined + function that sets alignment rules under the LA664 microarchitecture. + * config/loongarch/loongarch-opts.cc + (loongarch_target_option_override): If not optimizing for size, set + the default alignment to what the target wants. + * config/loongarch/loongarch-tune.h (struct loongarch_align): Add + new member variables jump and loop. +--- + gcc/config/loongarch/loongarch-def.cc | 11 ++++++++--- + gcc/config/loongarch/loongarch-opts.cc | 19 +++++++++++++------ + gcc/config/loongarch/loongarch-tune.h | 22 +++++++++++++++------- + 3 files changed, 36 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index 533dd0af2..a48050c5f 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -81,14 +81,19 @@ array_tune loongarch_cpu_cache = + + static inline loongarch_align la464_align () + { +- return loongarch_align ().function_ ("32").label_ ("16"); ++ return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16"); ++} ++ ++static inline loongarch_align la664_align () ++{ ++ return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32"); + } + + array_tune loongarch_cpu_align = + array_tune () +- .set (CPU_LOONGARCH64, la464_align ()) ++ .set (CPU_LOONGARCH64, la664_align ()) + .set (CPU_LA464, la464_align ()) +- .set (CPU_LA664, la464_align ()); ++ .set (CPU_LA664, la664_align ()); + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 062d430c2..c455c5e32 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -922,13 +922,20 @@ loongarch_target_option_override (struct loongarch_target *target, + { + loongarch_update_gcc_opt_status (target, opts, opts_set); + +- /* alignments */ +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions +- = loongarch_cpu_align[target->cpu_tune].function; ++ /* If not optimizing for size, set the default ++ alignment to what the target wants. */ ++ if (!opts->x_optimize_size) ++ { ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions ++ = loongarch_cpu_align[target->cpu_tune].function; ++ ++ if (opts->x_flag_align_loops && !opts->x_str_align_loops) ++ opts->x_str_align_loops = loongarch_cpu_align[target->cpu_tune].loop; + +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label; ++ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) ++ opts->x_str_align_jumps = loongarch_cpu_align[target->cpu_tune].jump; ++ } + + /* Set up parameters to be used in prefetching algorithm. */ + int simultaneous_prefetches +diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h +index 26f163f0a..d286eee0b 100644 +--- a/gcc/config/loongarch/loongarch-tune.h ++++ b/gcc/config/loongarch/loongarch-tune.h +@@ -162,14 +162,16 @@ struct loongarch_cache { + } + }; + +-/* Alignment for functions and labels for best performance. For new uarchs +- the value should be measured via benchmarking. See the documentation for +- -falign-functions and -falign-labels in invoke.texi for the format. */ ++/* Alignment for functions loops and jumps for best performance. For new ++ uarchs the value should be measured via benchmarking. See the ++ documentation for -falign-functions, -falign-loops, and -falign-jumps in ++ invoke.texi for the format. */ + struct loongarch_align { + const char *function; /* default value for -falign-functions */ +- const char *label; /* default value for -falign-labels */ ++ const char *loop; /* default value for -falign-loops */ ++ const char *jump; /* default value for -falign-jumps */ + +- loongarch_align () : function (nullptr), label (nullptr) {} ++ loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {} + + loongarch_align function_ (const char *_function) + { +@@ -177,9 +179,15 @@ struct loongarch_align { + return *this; + } + +- loongarch_align label_ (const char *_label) ++ loongarch_align loop_ (const char *_loop) + { +- label = _label; ++ loop = _loop; ++ return *this; ++ } ++ ++ loongarch_align jump_ (const char *_jump) ++ { ++ jump = _jump; + return *this; + } + }; +-- +2.43.0 + diff --git a/0163-LoongArch-Enable-switchable-target.patch b/0163-LoongArch-Enable-switchable-target.patch new file mode 100644 index 0000000..465ff3b --- /dev/null +++ b/0163-LoongArch-Enable-switchable-target.patch @@ -0,0 +1,281 @@ +From 427d5f10951435241d883a13557f862683046ddd Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 8 Apr 2024 16:45:13 +0800 +Subject: [PATCH 163/188] LoongArch: Enable switchable target + +This patch fixes the back-end context switching in cases where functions +should be built with their own target contexts instead of the +global one, such as LTO linking and functions with target attributes (TBD). + + PR target/113233 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_reg_init): + Reinitialize the loongarch_regno_mode_ok cache. + (loongarch_option_override): Same. + (loongarch_save_restore_target_globals): Restore target globals. + (loongarch_set_current_function): Restore the target contexts + for functions. + (TARGET_SET_CURRENT_FUNCTION): Define. + * config/loongarch/loongarch.h (SWITCHABLE_TARGET): Enable + switchable target context. + * config/loongarch/loongarch-builtins.cc (loongarch_init_builtins): + Initialize all builtin functions at startup. + (loongarch_expand_builtin): Turn assertion of builtin availability + into a test. + +gcc/testsuite/ChangeLog: + + * lib/target-supports.exp: Define condition loongarch_sx_as. + * gcc.dg/lto/pr113233_0.c: New test. +--- + gcc/config/loongarch/loongarch-builtins.cc | 25 +++--- + gcc/config/loongarch/loongarch.cc | 91 ++++++++++++++++++++-- + gcc/config/loongarch/loongarch.h | 2 + + gcc/testsuite/gcc.dg/lto/pr113233_0.c | 14 ++++ + gcc/testsuite/lib/target-supports.exp | 12 +++ + 5 files changed, 127 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/lto/pr113233_0.c + +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index e3b4dbc52..51abba007 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -2507,14 +2507,11 @@ loongarch_init_builtins (void) + for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++) + { + d = &loongarch_builtins[i]; +- if (d->avail ()) +- { +- type = loongarch_build_function_type (d->function_type); +- loongarch_builtin_decls[i] +- = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, +- NULL); +- loongarch_get_builtin_decl_index[d->icode] = i; +- } ++ type = loongarch_build_function_type (d->function_type); ++ loongarch_builtin_decls[i] ++ = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL, ++ NULL); ++ loongarch_get_builtin_decl_index[d->icode] = i; + } + } + +@@ -3100,15 +3097,21 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) + { + tree fndecl; +- unsigned int fcode, avail; ++ unsigned int fcode; + const struct loongarch_builtin_description *d; + + fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + fcode = DECL_MD_FUNCTION_CODE (fndecl); + gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins)); + d = &loongarch_builtins[fcode]; +- avail = d->avail (); +- gcc_assert (avail != 0); ++ ++ if (!d->avail ()) ++ { ++ error_at (EXPR_LOCATION (exp), ++ "built-in function %qD is not enabled", fndecl); ++ return target; ++ } ++ + switch (d->builtin_type) + { + case LARCH_BUILTIN_DIRECT: +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 8d8a50b70..50ab6a82a 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -7567,15 +7567,19 @@ loongarch_global_init (void) + loongarch_dwarf_regno[i] = INVALID_REGNUM; + } + ++ /* Function to allocate machine-dependent function status. */ ++ init_machine_status = &loongarch_init_machine_status; ++}; ++ ++static void ++loongarch_reg_init (void) ++{ + /* Set up loongarch_hard_regno_mode_ok. */ + for (int mode = 0; mode < MAX_MACHINE_MODE; mode++) + for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_p[mode][regno] + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); +- +- /* Function to allocate machine-dependent function status. */ +- init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_option_override_internal (struct loongarch_target *target, +@@ -7602,20 +7606,92 @@ loongarch_option_override_internal (struct loongarch_target *target, + + /* Override some options according to the resolved target. */ + loongarch_target_option_override (target, opts, opts_set); ++ ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (opts, opts_set); ++ ++ loongarch_reg_init (); ++} ++ ++/* Remember the last target of loongarch_set_current_function. */ ++ ++static GTY(()) tree loongarch_previous_fndecl; ++ ++/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree. ++ Used by loongarch_set_current_function to ++ make sure optab availability predicates are recomputed when necessary. */ ++ ++static void ++loongarch_save_restore_target_globals (tree new_tree) ++{ ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); ++} ++ ++/* Implement TARGET_SET_CURRENT_FUNCTION. */ ++ ++static void ++loongarch_set_current_function (tree fndecl) ++{ ++ if (fndecl == loongarch_previous_fndecl) ++ return; ++ ++ tree old_tree; ++ if (loongarch_previous_fndecl == NULL_TREE) ++ old_tree = target_option_current_node; ++ else if (DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl)) ++ old_tree = DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl); ++ else ++ old_tree = target_option_default_node; ++ ++ if (fndecl == NULL_TREE) ++ { ++ if (old_tree != target_option_current_node) ++ { ++ loongarch_previous_fndecl = NULL_TREE; ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION ++ (target_option_current_node)); ++ } ++ return; ++ } ++ ++ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ if (new_tree == NULL_TREE) ++ new_tree = target_option_default_node; ++ ++ loongarch_previous_fndecl = fndecl; ++ ++ if (new_tree == old_tree) ++ return; ++ ++ cl_target_option_restore (&global_options, &global_options_set, ++ TREE_TARGET_OPTION (new_tree)); ++ ++ loongarch_reg_init (); ++ ++ loongarch_save_restore_target_globals (new_tree); + } + ++ ++ + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + loongarch_option_override (void) + { ++ /* Global initializations. */ ++ loongarch_global_init (); ++ + /* Setting up the target configuration. */ + loongarch_option_override_internal (&la_target, + &global_options, + &global_options_set); + +- /* Global initializations. */ +- loongarch_global_init (); + } + + /* Implement TARGET_OPTION_SAVE. */ +@@ -10931,6 +11007,9 @@ loongarch_asm_code_end (void) + #undef TARGET_OPTION_RESTORE + #define TARGET_OPTION_RESTORE loongarch_option_restore + ++#undef TARGET_SET_CURRENT_FUNCTION ++#define TARGET_SET_CURRENT_FUNCTION loongarch_set_current_function ++ + #undef TARGET_LEGITIMIZE_ADDRESS + #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address + +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 221e8b286..089206605 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -23,6 +23,8 @@ along with GCC; see the file COPYING3. If not see + + #include "config/loongarch/loongarch-opts.h" + ++#define SWITCHABLE_TARGET 1 ++ + #define TARGET_SUPPORTS_WIDE_INT 1 + + /* Macros to silence warnings about numbers being signed in traditional +diff --git a/gcc/testsuite/gcc.dg/lto/pr113233_0.c b/gcc/testsuite/gcc.dg/lto/pr113233_0.c +new file mode 100644 +index 000000000..0a045c519 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/lto/pr113233_0.c +@@ -0,0 +1,14 @@ ++/* { dg-require-effective-target loongarch_sx_as } */ ++/* { dg-lto-do link } */ ++/* { dg-skip-if "" { ! { loongarch*-linux-* } } } */ ++/* { dg-lto-options { {-mlsx } } } */ ++/* { dg-suppress-ld-options { -mlsx } } */ ++ ++#include ++ ++int main (void) ++{ ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++} +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 20fbd43ee..b673c92b5 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -10549,6 +10549,18 @@ proc check_vect_support_and_set_flags { } { + return 1 + } + ++proc check_effective_target_loongarch_sx_as { } { ++ return [check_no_compiler_messages loongarch_sx_as object { ++ #include ++ int main (void) ++ { ++ __m128i a, b, c; ++ c = __lsx_vand_v (a, b); ++ return 0; ++ } ++ } "-mlsx"] ++} ++ + proc check_effective_target_loongarch_sx_hw { } { + return [check_runtime loongarch_sx_hw { + #include +-- +2.43.0 + diff --git a/0164-LoongArch-Define-ISA-versions.patch b/0164-LoongArch-Define-ISA-versions.patch new file mode 100644 index 0000000..8994f2d --- /dev/null +++ b/0164-LoongArch-Define-ISA-versions.patch @@ -0,0 +1,1016 @@ +From 66c8369ff9e5987c14786692cf6fd945a94273a1 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Tue, 23 Apr 2024 10:42:47 +0800 +Subject: [PATCH 164/188] LoongArch: Define ISA versions + +These ISA versions are defined as -march= parameters and +are recommended for building binaries for distribution. + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Make la64v1.0 the default ISA preset of the lp64d ABI. + * config/loongarch/genopts/loongarch-strings: Define la64v1.0, la64v1.1. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-c.cc (LARCH_CPP_SET_PROCESSOR): Likewise. + (loongarch_cpu_cpp_builtins): Likewise. + * config/loongarch/loongarch-cpu.cc (get_native_prid): Likewise. + (fill_native_cpu_config): Likewise. + * config/loongarch/loongarch-def.cc (array_tune): Likewise. + * config/loongarch/loongarch-def.h: Likewise. + * config/loongarch/loongarch-driver.cc (driver_set_m_parm): Likewise. + (driver_get_normalized_m_opts): Likewise. + * config/loongarch/loongarch-opts.cc (default_tune_for_arch): Likewise. + (TUNE_FOR_ARCH): Likewise. + (arch_str): Likewise. + (loongarch_target_option_override): Likewise. + * config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464): Likewise. + (TARGET_uARCH_LA664): Likewise. + * config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT): Likewise. + (STR_ARCH_ABI_DEFAULT): Likewise. + (STR_TUNE_GENERIC): Likewise. + (STR_ARCH_LA64V1_0): Likewise. + (STR_ARCH_LA64V1_1): Likewise. + * config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width): Likewise. + (loongarch_asm_code_end): Likewise. + * config/loongarch/loongarch.opt: Likewise. + * doc/invoke.texi: Likewise. +--- + gcc/config.gcc | 34 ++++---- + .../loongarch/genopts/loongarch-strings | 5 +- + gcc/config/loongarch/genopts/loongarch.opt.in | 43 ++++++++-- + gcc/config/loongarch/loongarch-c.cc | 37 +++------ + gcc/config/loongarch/loongarch-cpu.cc | 35 ++++---- + gcc/config/loongarch/loongarch-def.cc | 83 +++++++++++++------ + gcc/config/loongarch/loongarch-def.h | 37 ++++++--- + gcc/config/loongarch/loongarch-driver.cc | 8 +- + gcc/config/loongarch/loongarch-opts.cc | 66 +++++++++++---- + gcc/config/loongarch/loongarch-opts.h | 4 +- + gcc/config/loongarch/loongarch-str.h | 5 +- + gcc/config/loongarch/loongarch.cc | 11 +-- + gcc/config/loongarch/loongarch.opt | 43 ++++++++-- + gcc/doc/invoke.texi | 57 ++++++++----- + 14 files changed, 300 insertions(+), 168 deletions(-) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 1db558d4c..c6820d0f1 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -5035,7 +5035,7 @@ case "${target}" in + + # Perform initial sanity checks on --with-* options. + case ${with_arch} in +- "" | abi-default | loongarch64 | la[46]64) ;; # OK, append here. ++ "" | la64v1.[01] | abi-default | loongarch64 | la[46]64) ;; # OK, append here. + native) + if test x${host} != x${target}; then + echo "--with-arch=native is illegal for cross-compiler." 1>&2 +@@ -5082,10 +5082,18 @@ case "${target}" in + + # Infer ISA-related default options from the ABI: pass 1 + case ${abi_base}/${abi_ext} in +- lp64*/base) ++ lp64d/base) + # architectures that support lp64* ABI +- arch_pattern="native|abi-default|loongarch64|la[46]64" +- # default architecture for lp64* ABI ++ arch_pattern="native|abi-default|la64v1.[01]|loongarch64|la[46]64" ++ ++ # default architecture for lp64d ABI ++ arch_default="la64v1.0" ++ ;; ++ lp64[fs]/base) ++ # architectures that support lp64* ABI ++ arch_pattern="native|abi-default|la64v1.[01]|loongarch64|la[46]64" ++ ++ # default architecture for lp64[fs] ABI + arch_default="abi-default" + ;; + *) +@@ -5157,15 +5165,7 @@ case "${target}" in + + + # Check default with_tune configuration using with_arch. +- case ${with_arch} in +- loongarch64) +- tune_pattern="native|abi-default|loongarch64|la[46]64" +- ;; +- *) +- # By default, $with_tune == $with_arch +- tune_pattern="*" +- ;; +- esac ++ tune_pattern="native|generic|loongarch64|la[46]64" + + case ${with_tune} in + "") ;; # OK +@@ -5215,7 +5215,7 @@ case "${target}" in + # Fixed: use the default gcc configuration for all multilib + # builds by default. + with_multilib_default="" ;; +- arch,native|arch,loongarch64|arch,la[46]64) # OK, append here. ++ arch,native|arch,la64v1.[01]|arch,loongarch64|arch,la[46]64) # OK, append here. + with_multilib_default="/march=${component}" ;; + arch,*) + with_multilib_default="/march=abi-default" +@@ -5315,7 +5315,7 @@ case "${target}" in + if test x${parse_state} = x"arch"; then + # -march option + case ${component} in +- native | abi-default | loongarch64 | la[46]64) # OK, append here. ++ native | abi-default | la64v1.[01] | loongarch64 | la[46]64) # OK, append here. + # Append -march spec for each multilib variant. + loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}" + parse_state="opts" +@@ -5858,7 +5858,7 @@ case ${target} in + # See macro definitions from loongarch-opts.h and loongarch-cpu.h. + + # Architecture +- tm_defines="${tm_defines} DEFAULT_CPU_ARCH=CPU_$(echo ${with_arch} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_ARCH=ARCH_$(echo ${with_arch} | tr a-z.- A-Z__)" + + # Base ABI type + tm_defines="${tm_defines} DEFAULT_ABI_BASE=ABI_BASE_$(echo ${abi_base} | tr a-z- A-Z_)" +@@ -5870,7 +5870,7 @@ case ${target} in + + # Microarchitecture + if test x${with_tune} != x; then +- tm_defines="${tm_defines} DEFAULT_CPU_TUNE=CPU_$(echo ${with_tune} | tr a-z- A-Z_)" ++ tm_defines="${tm_defines} DEFAULT_CPU_TUNE=TUNE_$(echo ${with_tune} | tr a-z.- A-Z__)" + fi + + # FPU adjustment +diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings +index 99fd4e7cd..fd2f9b4f3 100644 +--- a/gcc/config/loongarch/genopts/loongarch-strings ++++ b/gcc/config/loongarch/genopts/loongarch-strings +@@ -23,10 +23,13 @@ OPTSTR_ARCH arch + OPTSTR_TUNE tune + + STR_CPU_NATIVE native +-STR_CPU_ABI_DEFAULT abi-default ++STR_ARCH_ABI_DEFAULT abi-default ++STR_TUNE_GENERIC generic + STR_CPU_LOONGARCH64 loongarch64 + STR_CPU_LA464 la464 + STR_CPU_LA664 la664 ++STR_ARCH_LA64V1_0 la64v1.0 ++STR_ARCH_LA64V1_1 la64v1.1 + + # Base architecture + STR_ISA_BASE_LA64 la64 +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index f3d53f03c..0ecd10922 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -95,30 +95,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + + ;; Base target models (implies ISA & tune parameters) + Enum +-Name(cpu_type) Type(int) +-LoongArch CPU types: ++Name(arch_type) Type(int) ++LoongArch ARCH presets: + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_NATIVE@@) Value(CPU_NATIVE) ++Enum(arch_type) String(@@STR_CPU_NATIVE@@) Value(ARCH_NATIVE) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_ABI_DEFAULT@@) Value(CPU_ABI_DEFAULT) ++Enum(arch_type) String(@@STR_ARCH_ABI_DEFAULT@@) Value(ARCH_ABI_DEFAULT) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64) ++Enum(arch_type) String(@@STR_CPU_LOONGARCH64@@) Value(ARCH_LOONGARCH64) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464) ++Enum(arch_type) String(@@STR_CPU_LA464@@) Value(ARCH_LA464) + + EnumValue +-Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664) ++Enum(arch_type) String(@@STR_CPU_LA664@@) Value(ARCH_LA664) ++ ++EnumValue ++Enum(arch_type) String(@@STR_ARCH_LA64V1_0@@) Value(ARCH_LA64V1_0) ++ ++EnumValue ++Enum(arch_type) String(@@STR_ARCH_LA64V1_1@@) Value(ARCH_LA64V1_1) + + m@@OPTSTR_ARCH@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(arch_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA. + ++Enum ++Name(tune_type) Type(int) ++LoongArch TUNE presets: ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_NATIVE@@) Value(TUNE_NATIVE) ++ ++EnumValue ++Enum(tune_type) String(@@STR_TUNE_GENERIC@@) Value(TUNE_GENERIC) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LOONGARCH64@@) Value(TUNE_LOONGARCH64) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LA464@@) Value(TUNE_LA464) ++ ++EnumValue ++Enum(tune_type) String(@@STR_CPU_LA664@@) Value(TUNE_LA664) ++ + m@@OPTSTR_TUNE@@= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(tune_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -m@@OPTSTR_TUNE@@=PROCESSOR Generate optimized code for PROCESSOR. + + +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index df2a482ad..153db75b0 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -31,29 +31,6 @@ along with GCC; see the file COPYING3. If not see + #define builtin_define(TXT) cpp_define (pfile, TXT) + #define builtin_assert(TXT) cpp_assert (pfile, TXT) + +-/* Define preprocessor macros for the -march and -mtune options. +- PREFIX is either _LOONGARCH_ARCH or _LOONGARCH_TUNE, INFO is +- the selected processor. If INFO's canonical name is "foo", +- define PREFIX to be "foo", and define an additional macro +- PREFIX_FOO. */ +-#define LARCH_CPP_SET_PROCESSOR(PREFIX, CPU_TYPE) \ +- do \ +- { \ +- char *macro, *p; \ +- int cpu_type = (CPU_TYPE); \ +- \ +- macro = concat ((PREFIX), "_", \ +- loongarch_cpu_strings[cpu_type], NULL); \ +- for (p = macro; *p != 0; p++) \ +- *p = TOUPPER (*p); \ +- \ +- builtin_define (macro); \ +- builtin_define_with_value ((PREFIX), \ +- loongarch_cpu_strings[cpu_type], 1); \ +- free (macro); \ +- } \ +- while (0) +- + void + loongarch_cpu_cpp_builtins (cpp_reader *pfile) + { +@@ -61,11 +38,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_assert ("cpu=loongarch"); + builtin_define ("__loongarch__"); + +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch); +- LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune); ++ builtin_define_with_value ("__loongarch_arch", ++ loongarch_arch_strings[la_target.cpu_arch], 1); ++ ++ builtin_define_with_value ("__loongarch_tune", ++ loongarch_tune_strings[la_target.cpu_tune], 1); ++ ++ builtin_define_with_value ("_LOONGARCH_ARCH", ++ loongarch_arch_strings[la_target.cpu_arch], 1); + +- LARCH_CPP_SET_PROCESSOR ("__loongarch_arch", la_target.cpu_arch); +- LARCH_CPP_SET_PROCESSOR ("__loongarch_tune", la_target.cpu_tune); ++ builtin_define_with_value ("_LOONGARCH_TUNE", ++ loongarch_tune_strings[la_target.cpu_tune], 1); + + /* Base architecture / ABI. */ + if (TARGET_64BIT) +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index 551d4f72c..eb1eb8011 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -62,7 +62,7 @@ cache_cpucfg (void) + uint32_t + get_native_prid (void) + { +- /* Fill loongarch_cpu_default_config[CPU_NATIVE] with cpucfg data, ++ /* Fill loongarch_cpu_default_config[ARCH_NATIVE] with cpucfg data, + see "Loongson Architecture Reference Manual" + (Volume 1, Section 2.2.10.5) */ + return cpucfg_cache[0]; +@@ -76,13 +76,14 @@ get_native_prid_str (void) + return (const char*) prid_str; + } + +-/* Fill property tables for CPU_NATIVE. */ ++/* Fill property tables for ARCH_NATIVE / TUNE_NATIVE. */ + void + fill_native_cpu_config (struct loongarch_target *tgt) + { +- int arch_native_p = tgt->cpu_arch == CPU_NATIVE; +- int tune_native_p = tgt->cpu_tune == CPU_NATIVE; +- int native_cpu_type = CPU_NATIVE; ++ int arch_native_p = tgt->cpu_arch == ARCH_NATIVE; ++ int tune_native_p = tgt->cpu_tune == TUNE_NATIVE; ++ int native_cpu_arch = ARCH_NATIVE; ++ int native_cpu_tune = TUNE_NATIVE; + + /* Nothing needs to be done unless "-march/tune=native" + is given or implied. */ +@@ -99,11 +100,13 @@ fill_native_cpu_config (struct loongarch_target *tgt) + switch (cpucfg_cache[0] & 0x00ffff00) + { + case 0x0014c000: /* LA464 */ +- native_cpu_type = CPU_LA464; ++ native_cpu_arch = ARCH_LA464; ++ native_cpu_tune = TUNE_LA464; + break; + + case 0x0014d000: /* LA664 */ +- native_cpu_type = CPU_LA664; ++ native_cpu_arch = ARCH_LA664; ++ native_cpu_tune = TUNE_LA664; + break; + + default: +@@ -119,7 +122,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + if (arch_native_p) + { + int tmp; +- tgt->cpu_arch = native_cpu_type; ++ tgt->cpu_arch = native_cpu_arch; + + auto &preset = loongarch_cpu_default_isa[tgt->cpu_arch]; + +@@ -127,8 +130,8 @@ fill_native_cpu_config (struct loongarch_target *tgt) + With: base architecture (ARCH) + At: cpucfg_words[1][1:0] */ + +- if (native_cpu_type != CPU_NATIVE) +- tmp = loongarch_cpu_default_isa[native_cpu_type].base; ++ if (native_cpu_arch != ARCH_NATIVE) ++ tmp = loongarch_cpu_default_isa[native_cpu_arch].base; + else + switch (cpucfg_cache[1] & 0x3) + { +@@ -173,7 +176,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + } + + /* Check consistency with PRID presets. */ +- if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu) ++ if (native_cpu_arch != ARCH_NATIVE && tmp != preset.fpu) + warning (0, "floating-point unit %qs differs from PRID preset %qs", + loongarch_isa_ext_strings[tmp], + loongarch_isa_ext_strings[preset.fpu]); +@@ -182,7 +185,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + preset.fpu = tmp; + + +- /* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd ++ /* Fill: loongarch_cpu_default_isa[ARCH_NATIVE].simd + With: SIMD extension type (LSX, LASX) + At: cpucfg_words[2][7:6] */ + +@@ -212,7 +215,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + /* Check consistency with PRID presets. */ + + /* +- if (native_cpu_type != CPU_NATIVE && tmp != preset.simd) ++ if (native_cpu_arch != ARCH_NATIVE && tmp != preset.simd) + warning (0, "SIMD extension %qs differs from PRID preset %qs", + loongarch_isa_ext_strings[tmp], + loongarch_isa_ext_strings[preset.simd]); +@@ -229,10 +232,10 @@ fill_native_cpu_config (struct loongarch_target *tgt) + if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit) + hw_isa_evolution |= entry.isa_evolution_bit; + +- if (native_cpu_type != CPU_NATIVE) ++ if (native_cpu_arch != ARCH_NATIVE) + { + /* Check if the local CPU really supports the features of the base +- ISA of probed native_cpu_type. If any feature is not detected, ++ ISA of probed native_cpu_arch. If any feature is not detected, + either GCC or the hardware is buggy. */ + if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) + warning (0, +@@ -247,7 +250,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) + + if (tune_native_p) + { +- tgt->cpu_tune = native_cpu_type; ++ tgt->cpu_tune = native_cpu_tune; + + /* Fill: loongarch_cpu_cache[tgt->cpu_tune] + With: cache size info +diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc +index a48050c5f..c3f9fc6de 100644 +--- a/gcc/config/loongarch/loongarch-def.cc ++++ b/gcc/config/loongarch/loongarch-def.cc +@@ -31,39 +31,64 @@ template + using array = loongarch_def_array; + + template +-using array_tune = array; ++using array_arch = array; + + template +-using array_arch = array; ++using array_tune = array; + +-/* CPU property tables. */ +-array_tune loongarch_cpu_strings = array_tune () +- .set (CPU_NATIVE, STR_CPU_NATIVE) +- .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT) +- .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64) +- .set (CPU_LA464, STR_CPU_LA464) +- .set (CPU_LA664, STR_CPU_LA664); ++array_arch loongarch_arch_strings = array_arch () ++ .set (ARCH_NATIVE, STR_CPU_NATIVE) ++ .set (ARCH_ABI_DEFAULT, STR_ARCH_ABI_DEFAULT) ++ .set (ARCH_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (ARCH_LA464, STR_CPU_LA464) ++ .set (ARCH_LA664, STR_CPU_LA664) ++ .set (ARCH_LA64V1_0, STR_ARCH_LA64V1_0) ++ .set (ARCH_LA64V1_1, STR_ARCH_LA64V1_1); ++ ++array_tune loongarch_tune_strings = array_tune () ++ .set (TUNE_NATIVE, STR_CPU_NATIVE) ++ .set (TUNE_GENERIC, STR_TUNE_GENERIC) ++ .set (TUNE_LOONGARCH64, STR_CPU_LOONGARCH64) ++ .set (TUNE_LA464, STR_CPU_LA464) ++ .set (TUNE_LA664, STR_CPU_LA664); + + array_arch loongarch_cpu_default_isa = + array_arch () +- .set (CPU_LOONGARCH64, ++ .set (ARCH_LOONGARCH64, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64)) +- .set (CPU_LA464, ++ ++ .set (ARCH_LA464, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) +- .set (CPU_LA664, ++ ++ .set (ARCH_LA664, + loongarch_isa () + .base_ (ISA_BASE_LA64) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX) ++ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA ++ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS ++ | OPTION_MASK_ISA_FRECIPE)) ++ .set (ARCH_LA64V1_0, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LSX)) ++ ++ .set (ARCH_LA64V1_1, ++ loongarch_isa () ++ .base_ (ISA_BASE_LA64) ++ .fpu_ (ISA_EXT_FPU64) ++ .simd_ (ISA_EXT_SIMD_LSX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA + | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS + | OPTION_MASK_ISA_FRECIPE)); + ++ + static inline loongarch_cache la464_cache () + { + return loongarch_cache () +@@ -75,9 +100,10 @@ static inline loongarch_cache la464_cache () + + array_tune loongarch_cpu_cache = + array_tune () +- .set (CPU_LOONGARCH64, la464_cache ()) +- .set (CPU_LA464, la464_cache ()) +- .set (CPU_LA664, la464_cache ()); ++ .set (TUNE_GENERIC, la464_cache ()) ++ .set (TUNE_LOONGARCH64, la464_cache ()) ++ .set (TUNE_LA464, la464_cache ()) ++ .set (TUNE_LA664, la464_cache ()); + + static inline loongarch_align la464_align () + { +@@ -91,9 +117,10 @@ static inline loongarch_align la664_align () + + array_tune loongarch_cpu_align = + array_tune () +- .set (CPU_LOONGARCH64, la664_align ()) +- .set (CPU_LA464, la464_align ()) +- .set (CPU_LA664, la664_align ()); ++ .set (TUNE_GENERIC, la664_align ()) ++ .set (TUNE_LOONGARCH64, la664_align ()) ++ .set (TUNE_LA464, la464_align ()) ++ .set (TUNE_LA664, la664_align ()); + + /* Default RTX cost initializer. */ + loongarch_rtx_cost_data::loongarch_rtx_cost_data () +@@ -117,7 +144,7 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () + any known "-mtune" type). */ + array_tune loongarch_cpu_rtx_cost_data = + array_tune () +- .set (CPU_LA664, ++ .set (TUNE_LA664, + loongarch_rtx_cost_data () + .movcf2gr_ (COSTS_N_INSNS (1)) + .movgr2cf_ (COSTS_N_INSNS (1))); +@@ -140,16 +167,18 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + .movcf2gr_ (COST_COMPLEX_INSN); + + array_tune loongarch_cpu_issue_rate = array_tune () +- .set (CPU_NATIVE, 4) +- .set (CPU_LOONGARCH64, 4) +- .set (CPU_LA464, 4) +- .set (CPU_LA664, 6); ++ .set (TUNE_NATIVE, 4) ++ .set (TUNE_GENERIC, 4) ++ .set (TUNE_LOONGARCH64, 4) ++ .set (TUNE_LA464, 4) ++ .set (TUNE_LA664, 6); + + array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () +- .set (CPU_NATIVE, 4) +- .set (CPU_LOONGARCH64, 4) +- .set (CPU_LA464, 4) +- .set (CPU_LA664, 6); ++ .set (TUNE_NATIVE, 4) ++ .set (TUNE_GENERIC, 4) ++ .set (TUNE_LOONGARCH64, 4) ++ .set (TUNE_LA464, 4) ++ .set (TUNE_LA664, 6); + + /* Wiring string definitions from loongarch-str.h to global arrays + with standard index values from loongarch-opts.h, so we can +diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h +index 2fe44da5a..10b5f9ddc 100644 +--- a/gcc/config/loongarch/loongarch-def.h ++++ b/gcc/config/loongarch/loongarch-def.h +@@ -177,21 +177,32 @@ struct loongarch_target + { + struct loongarch_isa isa; + struct loongarch_abi abi; +- int cpu_arch; /* CPU_ */ +- int cpu_tune; /* same */ ++ int cpu_arch; /* ARCH_ */ ++ int cpu_tune; /* TUNE_ */ + int cmodel; /* CMODEL_ */ + int tls_dialect; /* TLS_ */ + }; + +-/* CPU model */ ++/* ISA target presets (-march=*) */ + enum { +- CPU_NATIVE = 0, +- CPU_ABI_DEFAULT = 1, +- CPU_LOONGARCH64 = 2, +- CPU_LA464 = 3, +- CPU_LA664 = 4, +- N_ARCH_TYPES = 5, +- N_TUNE_TYPES = 5 ++ ARCH_NATIVE = 0, ++ ARCH_ABI_DEFAULT = 1, ++ ARCH_LOONGARCH64 = 2, ++ ARCH_LA464 = 3, ++ ARCH_LA664 = 4, ++ ARCH_LA64V1_0 = 5, ++ ARCH_LA64V1_1 = 6, ++ N_ARCH_TYPES = 7, ++}; ++ ++/* Tune target presets (-mtune=*) */ ++enum { ++ TUNE_NATIVE = 0, ++ TUNE_GENERIC = 1, ++ TUNE_LOONGARCH64 = 2, ++ TUNE_LA464 = 3, ++ TUNE_LA664 = 4, ++ N_TUNE_TYPES = 5, + }; + + /* TLS types. */ +@@ -200,9 +211,11 @@ enum { + TLS_DESCRIPTORS = 1 + }; + +-/* CPU model properties */ ++/* Target preset properties */ + extern loongarch_def_array +- loongarch_cpu_strings; ++ loongarch_arch_strings; ++extern loongarch_def_array ++ loongarch_tune_strings; + extern loongarch_def_array + loongarch_cpu_default_isa; + extern loongarch_def_array +diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc +index 8551cf94d..9e0b79994 100644 +--- a/gcc/config/loongarch/loongarch-driver.cc ++++ b/gcc/config/loongarch/loongarch-driver.cc +@@ -85,10 +85,10 @@ driver_set_m_parm (int argc, const char **argv) + loongarch_isa_ext_strings, 0, N_ISA_EXT_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cpu_arch, ARCH, \ +- loongarch_cpu_strings, 0, N_ARCH_TYPES) ++ loongarch_arch_strings, 0, N_ARCH_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cpu_tune, TUNE, \ +- loongarch_cpu_strings, 0, N_TUNE_TYPES) ++ loongarch_tune_strings, 0, N_TUNE_TYPES) + + LARCH_DRIVER_PARSE_PARM (la_target.cmodel, CMODEL, \ + loongarch_cmodel_strings, 0, N_CMODEL_TYPES) +@@ -190,7 +190,7 @@ driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED) + APPEND_VAL (loongarch_abi_base_strings[la_target.abi.base]); + + APPEND_OPT (ARCH); +- APPEND_VAL (loongarch_cpu_strings[la_target.cpu_arch]); ++ APPEND_VAL (loongarch_arch_strings[la_target.cpu_arch]); + + APPEND_OPT (ISA_EXT_FPU); + APPEND_VAL (loongarch_isa_ext_strings[la_target.isa.fpu]); +@@ -202,7 +202,7 @@ driver_get_normalized_m_opts (int argc, const char **argv ATTRIBUTE_UNUSED) + APPEND_VAL (loongarch_cmodel_strings[la_target.cmodel]); + + APPEND_OPT (TUNE); +- APPEND_VAL (loongarch_cpu_strings[la_target.cpu_tune]); ++ APPEND_VAL (loongarch_tune_strings[la_target.cpu_tune]); + + obstack_1grow (&opts_obstack, '\0'); + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index c455c5e32..735daeb7c 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -101,6 +101,7 @@ static int abi_compat_p (const struct loongarch_isa *isa, + struct loongarch_abi abi); + static int abi_default_cpu_arch (struct loongarch_abi abi, + struct loongarch_isa *isa); ++static int default_tune_for_arch (int arch, int fallback); + + /* Mandatory configure-time defaults. */ + #ifndef DEFAULT_ABI_BASE +@@ -259,35 +260,35 @@ loongarch_config_target (struct loongarch_target *target, + /* If cpu_tune is not set using neither -mtune nor --with-tune, + the current cpu_arch is used as its default. */ + t.cpu_tune = constrained.tune ? target->cpu_tune +- : (constrained.arch ? target->cpu_arch : +- (with_default_tune ? DEFAULT_CPU_TUNE : DEFAULT_CPU_ARCH)); ++ : (constrained.arch ++ ? default_tune_for_arch (target->cpu_arch, with_default_tune ++ ? DEFAULT_CPU_TUNE : TUNE_GENERIC) ++ : (with_default_tune ? DEFAULT_CPU_TUNE ++ : default_tune_for_arch (DEFAULT_CPU_ARCH, TUNE_GENERIC))); + + + /* Handle -march/tune=native */ + #ifdef __loongarch__ + /* For native compilers, gather local CPU information +- and fill the "CPU_NATIVE" index of arrays defined in +- loongarch-cpu.c. */ ++ and fill the "ARCH_NATIVE/TUNE_NATIVE" index of arrays ++ defined in loongarch-cpu.c. */ + + fill_native_cpu_config (&t); + + #else +- if (t.cpu_arch == CPU_NATIVE) ++ if (t.cpu_arch == ARCH_NATIVE) + fatal_error (UNKNOWN_LOCATION, + "%qs does not work on a cross compiler", + "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE); + +- else if (t.cpu_tune == CPU_NATIVE) ++ else if (t.cpu_tune == TUNE_NATIVE) + fatal_error (UNKNOWN_LOCATION, + "%qs does not work on a cross compiler", + "-m" OPTSTR_TUNE "=" STR_CPU_NATIVE); + #endif + +- /* Handle -march/tune=abi-default */ +- if (t.cpu_tune == CPU_ABI_DEFAULT) +- t.cpu_tune = abi_default_cpu_arch (t.abi, NULL); +- +- if (t.cpu_arch == CPU_ABI_DEFAULT) ++ /* Handle -march=abi-default */ ++ if (t.cpu_arch == ARCH_ABI_DEFAULT) + { + t.cpu_arch = abi_default_cpu_arch (t.abi, &(t.isa)); + loongarch_cpu_default_isa[t.cpu_arch] = t.isa; +@@ -438,16 +439,16 @@ config_target_isa: + so we adjust that first if it is not constrained. */ + int fallback_arch = abi_default_cpu_arch (t.abi, NULL); + +- if (t.cpu_arch == CPU_NATIVE) ++ if (t.cpu_arch == ARCH_NATIVE) + warning (0, "your native CPU architecture (%qs) " + "does not support %qs ABI, falling back to %<-m%s=%s%>", + arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, +- loongarch_cpu_strings[fallback_arch]); ++ loongarch_arch_strings[fallback_arch]); + else + warning (0, "default CPU architecture (%qs) " + "does not support %qs ABI, falling back to %<-m%s=%s%>", + arch_str (&t), abi_str (t.abi), OPTSTR_ARCH, +- loongarch_cpu_strings[fallback_arch]); ++ loongarch_arch_strings[fallback_arch]); + + t.cpu_arch = fallback_arch; + constrained.arch = 1; +@@ -664,11 +665,40 @@ abi_default_cpu_arch (struct loongarch_abi abi, + case ABI_BASE_LP64F: + case ABI_BASE_LP64S: + *isa = isa_required (abi); +- return CPU_LOONGARCH64; ++ return ARCH_LOONGARCH64; + } + gcc_unreachable (); + } + ++static inline int ++default_tune_for_arch (int arch, int fallback) ++{ ++ int ret; ++ switch (arch) ++ { ++ ++#define TUNE_FOR_ARCH(NAME) \ ++ case ARCH_##NAME: \ ++ ret = TUNE_##NAME; \ ++ break; ++ ++ TUNE_FOR_ARCH(NATIVE) ++ TUNE_FOR_ARCH(LOONGARCH64) ++ TUNE_FOR_ARCH(LA464) ++ TUNE_FOR_ARCH(LA664) ++ ++#undef TUNE_FOR_ARCH ++ ++ case ARCH_ABI_DEFAULT: ++ case ARCH_LA64V1_0: ++ case ARCH_LA64V1_1: ++ ret = fallback; ++ } ++ ++ gcc_assert (0 <= ret && ret < N_TUNE_TYPES); ++ return ret; ++} ++ + static const char* + abi_str (struct loongarch_abi abi) + { +@@ -731,7 +761,7 @@ isa_str (const struct loongarch_isa *isa, char separator) + static const char* + arch_str (const struct loongarch_target *target) + { +- if (target->cpu_arch == CPU_NATIVE) ++ if (target->cpu_arch == ARCH_NATIVE) + { + /* Describe a native CPU with unknown PRID. */ + const char* isa_string = isa_str (&target->isa, ','); +@@ -741,7 +771,7 @@ arch_str (const struct loongarch_target *target) + APPEND_STRING (isa_string) + } + else +- APPEND_STRING (loongarch_cpu_strings[target->cpu_arch]); ++ APPEND_STRING (loongarch_arch_strings[target->cpu_arch]); + + APPEND1 ('\0') + return XOBFINISH (&msg_obstack, const char *); +@@ -956,7 +986,7 @@ loongarch_target_option_override (struct loongarch_target *target, + /* Other arch-specific overrides. */ + switch (target->cpu_arch) + { +- case CPU_LA664: ++ case ARCH_LA664: + /* Enable -mrecipe=all for LA664 by default. */ + if (!opts_set->x_recip_mask) + { +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index a3b467f4c..325c1e29c 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -127,8 +127,8 @@ struct loongarch_flags { + (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) + + /* TARGET_ macros for use in *.md template conditionals */ +-#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) +-#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) ++#define TARGET_uARCH_LA464 (la_target.cpu_tune == TUNE_LA464) ++#define TARGET_uARCH_LA664 (la_target.cpu_tune == TUNE_LA664) + + /* Note: optimize_size may vary across functions, + while -m[no]-memcpy imposes a global constraint. */ +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index cacae38c0..3cbe12f7b 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -27,10 +27,13 @@ along with GCC; see the file COPYING3. If not see + #define OPTSTR_TUNE "tune" + + #define STR_CPU_NATIVE "native" +-#define STR_CPU_ABI_DEFAULT "abi-default" ++#define STR_ARCH_ABI_DEFAULT "abi-default" ++#define STR_TUNE_GENERIC "generic" + #define STR_CPU_LOONGARCH64 "loongarch64" + #define STR_CPU_LA464 "la464" + #define STR_CPU_LA664 "la664" ++#define STR_ARCH_LA64V1_0 "la64v1.0" ++#define STR_ARCH_LA64V1_1 "la64v1.1" + + #define STR_ISA_BASE_LA64 "la64" + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 50ab6a82a..c86a0856b 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -9605,9 +9605,10 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target, + + switch (target->cpu_tune) + { +- case CPU_LOONGARCH64: +- case CPU_LA464: +- case CPU_LA664: ++ case TUNE_GENERIC: ++ case TUNE_LOONGARCH64: ++ case TUNE_LA464: ++ case TUNE_LA664: + /* Vector part. */ + if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)) + { +@@ -10976,9 +10977,9 @@ loongarch_asm_code_end (void) + if (flag_verbose_asm) + { + fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START, +- loongarch_cpu_strings [la_target.cpu_arch]); ++ loongarch_arch_strings[la_target.cpu_arch]); + fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START, +- loongarch_cpu_strings [la_target.cpu_tune]); ++ loongarch_tune_strings[la_target.cpu_tune]); + fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, + loongarch_isa_base_strings [la_target.isa.base]); + DUMP_FEATURE (ISA_HAS_FRECIPE); +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 6f730d886..69b3b965c 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -103,30 +103,55 @@ Enable LoongArch Advanced SIMD Extension (LASX, 256-bit). + + ;; Base target models (implies ISA & tune parameters) + Enum +-Name(cpu_type) Type(int) +-LoongArch CPU types: ++Name(arch_type) Type(int) ++LoongArch ARCH presets: + + EnumValue +-Enum(cpu_type) String(native) Value(CPU_NATIVE) ++Enum(arch_type) String(native) Value(ARCH_NATIVE) + + EnumValue +-Enum(cpu_type) String(abi-default) Value(CPU_ABI_DEFAULT) ++Enum(arch_type) String(abi-default) Value(ARCH_ABI_DEFAULT) + + EnumValue +-Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64) ++Enum(arch_type) String(loongarch64) Value(ARCH_LOONGARCH64) + + EnumValue +-Enum(cpu_type) String(la464) Value(CPU_LA464) ++Enum(arch_type) String(la464) Value(ARCH_LA464) + + EnumValue +-Enum(cpu_type) String(la664) Value(CPU_LA664) ++Enum(arch_type) String(la664) Value(ARCH_LA664) ++ ++EnumValue ++Enum(arch_type) String(la64v1.0) Value(ARCH_LA64V1_0) ++ ++EnumValue ++Enum(arch_type) String(la64v1.1) Value(ARCH_LA64V1_1) + + march= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(arch_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET) Save + -march=PROCESSOR Generate code for the given PROCESSOR ISA. + ++Enum ++Name(tune_type) Type(int) ++LoongArch TUNE presets: ++ ++EnumValue ++Enum(tune_type) String(native) Value(TUNE_NATIVE) ++ ++EnumValue ++Enum(tune_type) String(generic) Value(TUNE_GENERIC) ++ ++EnumValue ++Enum(tune_type) String(loongarch64) Value(TUNE_LOONGARCH64) ++ ++EnumValue ++Enum(tune_type) String(la464) Value(TUNE_LA464) ++ ++EnumValue ++Enum(tune_type) String(la664) Value(TUNE_LA664) ++ + mtune= +-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save ++Target RejectNegative Joined Enum(tune_type) Var(la_opt_cpu_tune) Init(M_OPT_UNSET) Save + -mtune=PROCESSOR Generate optimized code for PROCESSOR. + + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index c9a1969ad..f6d59317b 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -999,7 +999,7 @@ Objective-C and Objective-C++ Dialects}. + -msign-extend-enabled -muser-enabled} + + @emph{LoongArch Options} +-@gccoptlist{-march=@var{cpu-type} -mtune=@var{cpu-type} -mabi=@var{base-abi-type} @gol ++@gccoptlist{-march=@var{arch-type} -mtune=@var{tune-type} -mabi=@var{base-abi-type} @gol + -mfpu=@var{fpu-type} -msoft-float -msingle-float -mdouble-float @gol + -mbranch-cost=@var{n} -mcheck-zero-division -mno-check-zero-division @gol + -mcond-move-int -mno-cond-move-int @gol +@@ -24455,35 +24455,52 @@ Enable user-defined instructions. + These command-line options are defined for LoongArch targets: + + @table @gcctabopt +-@item -march=@var{cpu-type} +-@opindex -march +-Generate instructions for the machine type @var{cpu-type}. In contrast to +-@option{-mtune=@var{cpu-type}}, which merely tunes the generated code +-for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC +-to generate code that may not run at all on processors other than the one +-indicated. Specifying @option{-march=@var{cpu-type}} implies +-@option{-mtune=@var{cpu-type}}, except where noted otherwise. ++@opindex march ++@item -march=@var{arch-type} ++Generate instructions for the machine type @var{arch-type}. ++@option{-march=@var{arch-type}} allows GCC to generate code that ++may not run at all on processors other than the one indicated. + +-The choices for @var{cpu-type} are: ++The choices for @var{arch-type} are: + + @table @samp + @item native +-This selects the CPU to generate code for at compilation time by determining +-the processor type of the compiling machine. Using @option{-march=native} +-enables all instruction subsets supported by the local machine (hence +-the result might not run on different machines). Using @option{-mtune=native} +-produces code optimized for the local machine under the constraints +-of the selected instruction set. ++Local processor type detected by the native compiler. + @item loongarch64 +-A generic CPU with 64-bit extensions. ++Generic LoongArch 64-bit processor. + @item la464 +-LoongArch LA464 CPU with LBT, LSX, LASX, LVZ. ++LoongArch LA464-based processor with LSX, LASX. ++@item la664 ++LoongArch LA664-based processor with LSX, LASX ++and all LoongArch v1.1 instructions. ++@item la64v1.0 ++LoongArch64 ISA version 1.0. ++@item la64v1.1 ++LoongArch64 ISA version 1.1. + @end table + ++More information about LoongArch ISA versions can be found at ++@uref{https://github.com/loongson/la-toolchain-conventions}. ++ + @item -mtune=@var{cpu-type} + @opindex mtune +-Optimize the output for the given processor, specified by microarchitecture +-name. ++@item -mtune=@var{tune-type} ++Optimize the generated code for the given processor target. ++ ++The choices for @var{tune-type} are: ++ ++@table @samp ++@item native ++Local processor type detected by the native compiler. ++@item generic ++Generic LoongArch processor. ++@item loongarch64 ++Generic LoongArch 64-bit processor. ++@item la464 ++LoongArch LA464 core. ++@item la664 ++LoongArch LA664 core. ++@end table + + @item -mabi=@var{base-abi-type} + @opindex mabi +-- +2.43.0 + diff --git a/0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch b/0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch new file mode 100644 index 0000000..f79b712 --- /dev/null +++ b/0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch @@ -0,0 +1,678 @@ +From 9af73fb7213d5c10b3683465e6682ad20f5abe64 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Tue, 23 Apr 2024 10:42:48 +0800 +Subject: [PATCH 165/188] LoongArch: Define builtin macros for ISA evolutions + +Detailed description of these definitions can be found at +https://github.com/loongson/la-toolchain-conventions, which +the LoongArch GCC port aims to conform to. + +gcc/ChangeLog: + + * config.gcc: Add loongarch-evolution.o. + * config/loongarch/genopts/genstr.sh: Enable generation of + loongarch-evolution.[cc,h]. + * config/loongarch/t-loongarch: Likewise. + * config/loongarch/genopts/gen-evolution.awk: New file. + * config/loongarch/genopts/isa-evolution.in: Mark ISA version + of introduction for each ISA evolution feature. + * config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins): + Define builtin macros for enabled ISA evolutions and the ISA + version. + * config/loongarch/loongarch-cpu.cc: Use loongarch-evolution.h. + * config/loongarch/loongarch.h: Likewise. + * config/loongarch/loongarch-cpucfg-map.h: Delete. + * config/loongarch/loongarch-evolution.cc: New file. + * config/loongarch/loongarch-evolution.h: New file. + * config/loongarch/loongarch-opts.h (ISA_HAS_FRECIPE): Define. + (ISA_HAS_DIV32): Likewise. + (ISA_HAS_LAM_BH): Likewise. + (ISA_HAS_LAMCAS): Likewise. + (ISA_HAS_LD_SEQ_SA): Likewise. +--- + gcc/config.gcc | 2 +- + .../loongarch/genopts/gen-evolution.awk | 230 ++++++++++++++++++ + gcc/config/loongarch/genopts/genstr.sh | 82 ++----- + gcc/config/loongarch/genopts/isa-evolution.in | 10 +- + gcc/config/loongarch/loongarch-c.cc | 23 ++ + gcc/config/loongarch/loongarch-cpu.cc | 2 +- + gcc/config/loongarch/loongarch-evolution.cc | 60 +++++ + ...rch-cpucfg-map.h => loongarch-evolution.h} | 46 +++- + gcc/config/loongarch/loongarch-opts.h | 11 - + gcc/config/loongarch/loongarch.h | 1 + + gcc/config/loongarch/t-loongarch | 26 +- + 11 files changed, 398 insertions(+), 95 deletions(-) + create mode 100644 gcc/config/loongarch/genopts/gen-evolution.awk + create mode 100644 gcc/config/loongarch/loongarch-evolution.cc + rename gcc/config/loongarch/{loongarch-cpucfg-map.h => loongarch-evolution.h} (52%) + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c6820d0f1..a405e6d2e 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -458,7 +458,7 @@ loongarch*-*-*) + cpu_type=loongarch + d_target_objs="loongarch-d.o" + extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" +- extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" ++ extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o" + extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + extra_options="${extra_options} g.opt fused-madd.opt" + ;; +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +new file mode 100644 +index 000000000..4d105afa9 +--- /dev/null ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -0,0 +1,230 @@ ++#!/usr/bin/gawk ++# ++# A simple script that generates loongarch-evolution.h ++# from genopts/isa-evolution.in ++# ++# Copyright (C) 2021-2024 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it under ++# the terms of the GNU General Public License as published by the Free ++# Software Foundation; either version 3, or (at your option) any later ++# version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++BEGIN { ++ # isa_version_major[] ++ # isa_version_minor[] ++ # cpucfg_word[] ++ # cpucfg_bit_in_word[] ++ # name_capitalized[] ++ # comment[] ++} ++ ++{ ++ cpucfg_word[NR] = $1 ++ cpucfg_bit_in_word[NR] = $2 ++ name[NR] = gensub(/-/, "_", "g", $3) ++ name_capitalized[NR] = toupper(name[NR]) ++ isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\1", 1, $4) ++ isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\2", 1, $4) ++ ++ $1 = $2 = $3 = $4 = "" ++ sub (/^\s*/, "") ++ comment[NR] = $0 ++} ++ ++function copyright_header(from_year,to_year) ++{ ++ print " Copyright (C) " from_year "-" to_year \ ++ " Free Software Foundation, Inc." ++ print "" ++ print "This file is part of GCC." ++ print "" ++ print "GCC is free software; you can redistribute it and/or modify" ++ print "it under the terms of the GNU General Public License as published by" ++ print "the Free Software Foundation; either version 3, or (at your option)" ++ print "any later version." ++ print "" ++ print "GCC is distributed in the hope that it will be useful," ++ print "but WITHOUT ANY WARRANTY; without even the implied warranty of" ++ print "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the" ++ print "GNU General Public License for more details." ++ print "" ++ print "You should have received a copy of the GNU General Public License" ++ print "along with GCC; see the file COPYING3. If not see" ++ print "." ++} ++ ++function gen_cpucfg_map() ++{ ++ print "static constexpr struct {" ++ print " int cpucfg_word;" ++ print " unsigned int cpucfg_bit;" ++ print " HOST_WIDE_INT isa_evolution_bit;" ++ print "} cpucfg_map[] = {" ++ ++ for (i = 1; i <= NR; i++) ++ printf (" { %d, 1u << %d, OPTION_MASK_ISA_%s },\n", ++ cpucfg_word[i], cpucfg_bit_in_word[i], name_capitalized[i]) ++ ++ print "};" ++} ++ ++function gen_cpucfg_useful_idx() ++{ ++ split("0 1 2 16 17 18 19", init_useful_idx) ++ ++ delete idx_bucket ++ ++ for (i in init_useful_idx) ++ idx_bucket[init_useful_idx[i]] = 1 ++ delete init_useful_idx ++ ++ for (i in cpucfg_word) ++ idx_bucket[cpucfg_word[i]] = 1 ++ ++ delete idx_list ++ for (i in idx_bucket) ++ idx_list[length(idx_list)-1] = i+0 ++ delete idx_bucket ++ ++ asort (idx_list) ++ ++ print "static constexpr int cpucfg_useful_idx[] = {" ++ for (i in idx_list) ++ printf(" %d,\n", idx_list[i]) ++ print "};" ++ ++ print "" ++ ++ printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", ++ idx_list[length(idx_list)] + 1) ++ ++ delete idx_list ++} ++ ++function gen_evolution_decl() ++{ ++ print "/* ISA evolution features */" ++ print "enum {" ++ ++ for (i = 1; i <= NR; i++) ++ print " EVO_" name_capitalized[i] " = " i - 1 "," ++ ++ print " N_EVO_FEATURES = " NR ++ print "};" ++ print "" ++ ++ print "/* Condition macros */" ++ for (i = 1; i <= NR; i++) ++ printf ("#define ISA_HAS_%s \\\n" \ ++ " (la_target.isa.evolution & OPTION_MASK_ISA_%s)\n", ++ name_capitalized[i], name_capitalized[i]) ++ print "" ++ ++ print "/* Bitmasks on la_target.isa.evolution. */" ++ print "extern int la_evo_feature_masks[N_EVO_FEATURES];" ++ print "" ++ print "/* Builtin macro names for the evolution features. */" ++ print "extern const char* la_evo_macro_name[N_EVO_FEATURES];" ++ print "" ++ print "/* The ISA version where a specific feature is introduced. */" ++ print "extern int la_evo_version_major[N_EVO_FEATURES];" ++ print "extern int la_evo_version_minor[N_EVO_FEATURES];" ++} ++ ++function gen_full_header() ++{ ++ print "/* Generated automatically by \"genstr\" from \"isa-evolution.in\"." ++ print " Please do not edit this file directly." ++ print "" ++ ++ copyright_header(2023, 2024) ++ ++ print "*/" ++ print "" ++ ++ print "#ifndef LOONGARCH_EVOLUTION_H" ++ print "#define LOONGARCH_EVOLUTION_H" ++ print "" ++ print "#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)" ++ print "" ++ print "#include \"options.h\"" ++ print "" ++ ++ gen_cpucfg_map() ++ ++ print "" ++ ++ gen_cpucfg_useful_idx() ++ ++ print "" ++ ++ gen_evolution_decl() ++ ++ print "" ++ print "#endif" ++ print "" ++ print "#endif /* LOONGARCH_EVOLUTION_H */" ++} ++ ++ ++function gen_full_source() ++{ ++ print "/* Generated automatically by \"genstr\" from \"isa-evolution.in\"." ++ print " Please do not edit this file directly." ++ print "" ++ ++ copyright_header(2023, 2024) ++ ++ print "*/" ++ print "" ++ print "#include \"config.h\"" ++ print "#include \"system.h\"" ++ print "#include \"coretypes.h\"" ++ print "#include \"options.h\"" ++ print "" ++ print "#include \"loongarch-evolution.h\"" ++ print "" ++ ++ print "int la_evo_feature_masks[] = {"; ++ for (i = 1; i <= NR; i++) ++ print " OPTION_MASK_ISA_" name_capitalized[i] "," ++ print "};" ++ print "" ++ ++ print "const char* la_evo_macro_name[] = {"; ++ for (i = 1; i <= NR; i++) ++ print " \"__loongarch_" name[i] "\"," ++ print "};" ++ print "" ++ ++ ++ print "int la_evo_version_major[] = {" ++ for (i = 1; i <= NR; i++) ++ print " " isa_version_major[i] ", /* " name_capitalized[i] " */" ++ print "};" ++ print "" ++ ++ print "int la_evo_version_minor[] = {" ++ for (i = 1; i <= NR; i++) ++ print " " isa_version_minor[i] ", /* " name_capitalized[i] " */" ++ print "};" ++} ++ ++END { ++ if (header_p) ++ gen_full_header() ++ else ++ gen_full_source() ++} +diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh +index 391eca121..3e86c8152 100755 +--- a/gcc/config/loongarch/genopts/genstr.sh ++++ b/gcc/config/loongarch/genopts/genstr.sh +@@ -108,78 +108,30 @@ EOF + print("m"$3) + gsub(/-/, "_", $3) + print("Target Mask(ISA_"toupper($3)") Var(la_isa_evolution)") +- $1=""; $2=""; $3="" ++ $1=""; $2=""; $3=""; $4="" + sub(/^ */, "", $0) + print($0) + }' isa-evolution.in + } + +-gen_cpucfg_map() { +- cat <. */ +- +-#ifndef LOONGARCH_CPUCFG_MAP_H +-#define LOONGARCH_CPUCFG_MAP_H +- +-#include "options.h" +- +-static constexpr struct { +- int cpucfg_word; +- unsigned int cpucfg_bit; +- HOST_WIDE_INT isa_evolution_bit; +-} cpucfg_map[] = { +-EOF +- +- # Generate the strings from isa-evolution.in. +- awk '{ +- gsub(/-/, "_", $3) +- print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },") +- }' isa-evolution.in +- +- echo "};" +- echo +- echo "static constexpr int cpucfg_useful_idx[] = {" +- +- awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") } +- {if ($1+0 > max+0) max=$1; print(" "$1",")}' \ +- isa-evolution.in | sort -n | uniq +- +- echo "};" +- echo "" +- +- awk 'BEGIN { max=19 } +- { if ($1+0 > max+0) max=$1 } +- END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \ +- isa-evolution.in +- +- echo "#endif /* LOONGARCH_CPUCFG_MAP_H */" +-} +- + main() { + case "$1" in +- cpucfg-map) gen_cpucfg_map;; +- header) gen_defines;; +- opt) gen_options;; +- *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;; ++ evolution_h) ++ awk -v header_p=1 -f gen-evolution.awk isa-evolution.in ++ ;; ++ evolution_c) ++ awk -v header_p=0 -f gen-evolution.awk isa-evolution.in ++ ;; ++ header) ++ gen_defines ++ ;; ++ opt) ++ gen_options ++ ;; ++ *) ++ echo "Unknown Command: \"$1\". Available: header, opt, evolution_h, evolution_c" ++ exit 1 ++ ;; + esac + } + +diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in +index 11a198b64..50f72d5a0 100644 +--- a/gcc/config/loongarch/genopts/isa-evolution.in ++++ b/gcc/config/loongarch/genopts/isa-evolution.in +@@ -1,5 +1,5 @@ +-2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. +-2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. +-2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions. +-2 28 lamcas Support amcas[_db].{b/h/w/d} instructions. +-3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700). ++2 25 frecipe 1.1 Support frecipe.{s/d} and frsqrte.{s/d} instructions. ++2 26 div32 1.1 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. ++2 27 lam-bh 1.1 Support am{swap/add}[_db].{b/h} instructions. ++2 28 lamcas 1.1 Support amcas[_db].{b/h/w/d} instructions. ++3 23 ld-seq-sa 1.1 Do not need load-load barriers (dbar 0x700). +diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc +index 153db75b0..4ecea6a45 100644 +--- a/gcc/config/loongarch/loongarch-c.cc ++++ b/gcc/config/loongarch/loongarch-c.cc +@@ -103,6 +103,29 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) + builtin_define ("__loongarch_simd_width=256"); + } + ++ /* ISA evolution features */ ++ int max_v_major = 1, max_v_minor = 0; ++ ++ for (int i = 0; i < N_EVO_FEATURES; i++) ++ if (la_target.isa.evolution & la_evo_feature_masks[i]) ++ { ++ builtin_define (la_evo_macro_name[i]); ++ ++ int major = la_evo_version_major[i], ++ minor = la_evo_version_minor[i]; ++ ++ max_v_major = major > max_v_major ? major : max_v_major; ++ max_v_minor = major == max_v_major ++ ? (minor > max_v_minor ? minor : max_v_minor): max_v_minor; ++ } ++ ++ /* Find the minimum ISA version required to run the target program. */ ++ if (!(max_v_major == 1 && max_v_minor <= 1 && ISA_HAS_LASX)) ++ { ++ builtin_define_with_int_value ("__loongarch_version_major", max_v_major); ++ builtin_define_with_int_value ("__loongarch_version_minor", max_v_minor); ++ } ++ + /* Native Data Sizes. */ + builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); +diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc +index eb1eb8011..49107f2ae 100644 +--- a/gcc/config/loongarch/loongarch-cpu.cc ++++ b/gcc/config/loongarch/loongarch-cpu.cc +@@ -28,8 +28,8 @@ along with GCC; see the file COPYING3. If not see + #include "loongarch-def.h" + #include "loongarch-opts.h" + #include "loongarch-cpu.h" +-#include "loongarch-cpucfg-map.h" + #include "loongarch-str.h" ++#include "loongarch-evolution.h" + + + /* Native CPU detection with "cpucfg" */ +diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc +new file mode 100644 +index 000000000..1fb4e3b01 +--- /dev/null ++++ b/gcc/config/loongarch/loongarch-evolution.cc +@@ -0,0 +1,60 @@ ++/* Generated automatically by "genstr" from "isa-evolution.in". ++ Please do not edit this file directly. ++ ++ Copyright (C) 2023-2024 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. ++*/ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "options.h" ++ ++#include "loongarch-evolution.h" ++ ++int la_evo_feature_masks[] = { ++ OPTION_MASK_ISA_FRECIPE, ++ OPTION_MASK_ISA_DIV32, ++ OPTION_MASK_ISA_LAM_BH, ++ OPTION_MASK_ISA_LAMCAS, ++ OPTION_MASK_ISA_LD_SEQ_SA, ++}; ++ ++const char* la_evo_macro_name[] = { ++ "__loongarch_frecipe", ++ "__loongarch_div32", ++ "__loongarch_lam_bh", ++ "__loongarch_lamcas", ++ "__loongarch_ld_seq_sa", ++}; ++ ++int la_evo_version_major[] = { ++ 1, /* FRECIPE */ ++ 1, /* DIV32 */ ++ 1, /* LAM_BH */ ++ 1, /* LAMCAS */ ++ 1, /* LD_SEQ_SA */ ++}; ++ ++int la_evo_version_minor[] = { ++ 1, /* FRECIPE */ ++ 1, /* DIV32 */ ++ 1, /* LAM_BH */ ++ 1, /* LAMCAS */ ++ 1, /* LD_SEQ_SA */ ++}; +diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-evolution.h +similarity index 52% +rename from gcc/config/loongarch/loongarch-cpucfg-map.h +rename to gcc/config/loongarch/loongarch-evolution.h +index 148333c24..d64996481 100644 +--- a/gcc/config/loongarch/loongarch-cpucfg-map.h ++++ b/gcc/config/loongarch/loongarch-evolution.h +@@ -17,10 +17,13 @@ GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see +-. */ ++. ++*/ + +-#ifndef LOONGARCH_CPUCFG_MAP_H +-#define LOONGARCH_CPUCFG_MAP_H ++#ifndef LOONGARCH_EVOLUTION_H ++#define LOONGARCH_EVOLUTION_H ++ ++#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) + + #include "options.h" + +@@ -48,4 +51,39 @@ static constexpr int cpucfg_useful_idx[] = { + }; + + static constexpr int N_CPUCFG_WORDS = 20; +-#endif /* LOONGARCH_CPUCFG_MAP_H */ ++ ++/* ISA evolution features */ ++enum { ++ EVO_FRECIPE = 0, ++ EVO_DIV32 = 1, ++ EVO_LAM_BH = 2, ++ EVO_LAMCAS = 3, ++ EVO_LD_SEQ_SA = 4, ++ N_EVO_FEATURES = 5 ++}; ++ ++/* Condition macros */ ++#define ISA_HAS_FRECIPE \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) ++#define ISA_HAS_DIV32 \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) ++#define ISA_HAS_LAM_BH \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) ++#define ISA_HAS_LAMCAS \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) ++#define ISA_HAS_LD_SEQ_SA \ ++ (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) ++ ++/* Bitmasks on la_target.isa.evolution. */ ++extern int la_evo_feature_masks[N_EVO_FEATURES]; ++ ++/* Builtin macro names for the evolution features. */ ++extern const char* la_evo_macro_name[N_EVO_FEATURES]; ++ ++/* The ISA version where a specific feature is introduced. */ ++extern int la_evo_version_major[N_EVO_FEATURES]; ++extern int la_evo_version_minor[N_EVO_FEATURES]; ++ ++#endif ++ ++#endif /* LOONGARCH_EVOLUTION_H */ +diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h +index 325c1e29c..19bae5a0b 100644 +--- a/gcc/config/loongarch/loongarch-opts.h ++++ b/gcc/config/loongarch/loongarch-opts.h +@@ -115,17 +115,6 @@ struct loongarch_flags { + #define ISA_HAS_LASX \ + (la_target.isa.simd == ISA_EXT_SIMD_LASX) + +-#define ISA_HAS_FRECIPE \ +- (la_target.isa.evolution & OPTION_MASK_ISA_FRECIPE) +-#define ISA_HAS_DIV32 \ +- (la_target.isa.evolution & OPTION_MASK_ISA_DIV32) +-#define ISA_HAS_LAM_BH \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) +-#define ISA_HAS_LAMCAS \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) +-#define ISA_HAS_LD_SEQ_SA \ +- (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) +- + /* TARGET_ macros for use in *.md template conditionals */ + #define TARGET_uARCH_LA464 (la_target.cpu_tune == TUNE_LA464) + #define TARGET_uARCH_LA664 (la_target.cpu_tune == TUNE_LA664) +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 089206605..6743d2684 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + /* LoongArch external variables defined in loongarch.cc. */ + + #include "config/loongarch/loongarch-opts.h" ++#include "config/loongarch/loongarch-evolution.h" + + #define SWITCHABLE_TARGET 1 + +diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch +index 488e8cff3..53dde9ce6 100644 +--- a/gcc/config/loongarch/t-loongarch ++++ b/gcc/config/loongarch/t-loongarch +@@ -21,7 +21,7 @@ GTM_H += loongarch-multilib.h + OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \ + $(srcdir)/config/loongarch/loongarch-def-array.h \ + $(srcdir)/config/loongarch/loongarch-tune.h \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(srcdir)/config/loongarch/loongarch-evolution.h + + # Canonical target triplet from config.gcc + LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\ +@@ -62,7 +62,11 @@ loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++ $(srcdir)/config/loongarch/loongarch-evolution.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ++ ++loongarch-evolution.o: $(srcdir)/config/loongarch/loongarch-evolution.cc $(LA_STR_H) \ ++ $(srcdir)/config/loongarch/loongarch-evolution.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) +@@ -84,11 +88,17 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ + $(srcdir)/config/loongarch/loongarch.opt + $(STAMP) s-loongarch-opt + +-$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map ++$(srcdir)/config/loongarch/loongarch-evolution.h: s-loongarch-evolution + @true +-s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \ +- $(srcdir)/config/loongarch/genopts/isa-evolution.in +- $(SHELL) $< cpucfg-map > tmp-cpucfg.h +- $(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \ +- $(srcdir)/config/loongarch/loongarch-cpucfg-map.h ++$(srcdir)/config/loongarch/loongarch-evolution.cc: s-loongarch-evolution ++ @true ++s-loongarch-evolution: $(srcdir)/config/loongarch/genopts/genstr.sh \ ++ $(srcdir)/config/loongarch/genopts/isa-evolution.in \ ++ $(srcdir)/config/loongarch/genopts/gen-evolution.awk ++ $(SHELL) $< evolution_h > tmp-isa-evo.h ++ $(SHELL) $< evolution_c > tmp-isa-evo.cc ++ $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.h \ ++ $(srcdir)/config/loongarch/loongarch-evolution.h ++ $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.cc \ ++ $(srcdir)/config/loongarch/loongarch-evolution.cc + $(STAMP) $@ +-- +2.43.0 + diff --git a/0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch b/0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch new file mode 100644 index 0000000..aa4dd2f --- /dev/null +++ b/0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch @@ -0,0 +1,120 @@ +From 3bb46830b0f92f54d1ef529796348c0a86504065 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Fri, 26 Apr 2024 15:59:11 +0800 +Subject: [PATCH 166/188] LoongArch: Add constraints for bit string operation + define_insn_and_split's [PR114861] + +Without the constrants, the compiler attempts to use a stack slot as the +target, causing an ICE building the kernel with -Os: + + drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c:3144:1: + error: could not split insn + (insn:TI 1764 67 1745 + (set (mem/c:DI (reg/f:DI 3 $r3) [707 %sfp+-80 S8 A64]) + (and:DI (reg/v:DI 28 $r28 [orig:422 raster_config ] [422]) + (const_int -50331649 [0xfffffffffcffffff]))) + "drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c":1386:21 111 + {*bstrins_di_for_mask} + (nil)) + +Add these constrants to fix the issue. + +gcc/ChangeLog: + + PR target/114861 + * config/loongarch/loongarch.md (bstrins__for_mask): Add + constraints for operands. + (bstrins__for_ior_mask): Likewise. + +gcc/testsuite/ChangeLog: + + PR target/114861 + * gcc.target/loongarch/pr114861.c: New test. +--- + gcc/config/loongarch/loongarch.md | 16 ++++---- + gcc/testsuite/gcc.target/loongarch/pr114861.c | 39 +++++++++++++++++++ + 2 files changed, 47 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr114861.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 95beb88fe..20494ce8a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1543,9 +1543,9 @@ + (set_attr "mode" "")]) + + (define_insn_and_split "*bstrins__for_mask" +- [(set (match_operand:GPR 0 "register_operand") +- (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "ins_zero_bitmask_operand")))] ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "ins_zero_bitmask_operand" "i")))] + "" + "#" + "" +@@ -1563,11 +1563,11 @@ + }) + + (define_insn_and_split "*bstrins__for_ior_mask" +- [(set (match_operand:GPR 0 "register_operand") +- (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand") +- (match_operand:GPR 2 "const_int_operand")) +- (and:GPR (match_operand:GPR 3 "register_operand") +- (match_operand:GPR 4 "const_int_operand"))))] ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "const_int_operand" "i")) ++ (and:GPR (match_operand:GPR 3 "register_operand" "r") ++ (match_operand:GPR 4 "const_int_operand" "i"))))] + "loongarch_pre_reload_split () + && loongarch_use_bstrins_for_ior_with_mask (mode, operands)" + "#" +diff --git a/gcc/testsuite/gcc.target/loongarch/pr114861.c b/gcc/testsuite/gcc.target/loongarch/pr114861.c +new file mode 100644 +index 000000000..e6507c406 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr114861.c +@@ -0,0 +1,39 @@ ++/* PR114861: ICE building the kernel with -Os ++ Reduced from linux/fs/ntfs3/attrib.c at revision c942a0cd3603. */ ++/* { dg-do compile } */ ++/* { dg-options "-Os -march=loongarch64 -msoft-float -mabi=lp64s" } */ ++ ++long evcn, attr_collapse_range_vbo, attr_collapse_range_bytes; ++unsigned short flags; ++int attr_collapse_range_ni_0_0; ++int *attr_collapse_range_mi; ++unsigned attr_collapse_range_svcn, attr_collapse_range_vcn1; ++void ni_insert_nonresident (unsigned, unsigned short, int **); ++int mi_pack_runs (int); ++int ++attr_collapse_range (void) ++{ ++ _Bool __trans_tmp_1; ++ int run = attr_collapse_range_ni_0_0; ++ unsigned evcn1, vcn, end; ++ short a_flags = flags; ++ __trans_tmp_1 = flags & (32768 | 1); ++ if (__trans_tmp_1) ++ return 2; ++ vcn = attr_collapse_range_vbo; ++ end = attr_collapse_range_bytes; ++ evcn1 = evcn; ++ for (;;) ++ if (attr_collapse_range_svcn >= end) ++ { ++ unsigned eat, next_svcn = mi_pack_runs (42); ++ attr_collapse_range_vcn1 = (vcn ? vcn : attr_collapse_range_svcn); ++ eat = (0 < end) - attr_collapse_range_vcn1; ++ mi_pack_runs (run - eat); ++ if (next_svcn + eat) ++ ni_insert_nonresident (evcn1 - eat - next_svcn, a_flags, ++ &attr_collapse_range_mi); ++ } ++ else ++ return 42; ++} +-- +2.43.0 + diff --git a/0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch b/0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch new file mode 100644 index 0000000..bc867d9 --- /dev/null +++ b/0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch @@ -0,0 +1,67 @@ +From be1397b598a436d562e6a35a13ed2ae695531255 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 22 May 2024 09:29:43 +0800 +Subject: [PATCH 167/188] LoongArch: Guard REGNO with REG_P in + loongarch_expand_conditional_move [PR115169] + +gcc/ChangeLog: + + PR target/115169 + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Guard REGNO with REG_P. +--- + gcc/config/loongarch/loongarch.cc | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index c86a0856b..0c2c38f6f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5341,6 +5341,7 @@ loongarch_expand_conditional_move (rtx *operands) + rtx op1_extend = op1; + + /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */ ++ bool promote_op[2] = {false, false}; + bool promote_p = false; + machine_mode mode = GET_MODE (operands[0]); + +@@ -5348,9 +5349,15 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if ((REGNO (op0) == REGNO (operands[2]) +- || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx))) +- && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ { ++ promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && ++ REGNO (op0) == REGNO (operands[2])); ++ promote_op[1] = (REG_P (op1) && REG_P (operands[3]) && ++ REGNO (op1) == REGNO (operands[3])); ++ } ++ ++ if (promote_op[0] || promote_op[1]) + { + mode = word_mode; + promote_p = true; +@@ -5392,7 +5399,7 @@ loongarch_expand_conditional_move (rtx *operands) + + if (promote_p) + { +- if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2])) ++ if (promote_op[0]) + op2 = op0_extend; + else + { +@@ -5400,7 +5407,7 @@ loongarch_expand_conditional_move (rtx *operands) + op2 = force_reg (mode, op2); + } + +- if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3])) ++ if (promote_op[1]) + op3 = op1_extend; + else + { +-- +2.43.0 + diff --git a/0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch b/0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch new file mode 100644 index 0000000..f6986bb --- /dev/null +++ b/0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch @@ -0,0 +1,36 @@ +From 7675f45536691eeca7d8163020c9bfb127d5ee4f Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 12 Jun 2024 11:01:53 +0800 +Subject: [PATCH 168/188] LoongArch: Fix mode size comparision in + loongarch_expand_conditional_move + +We were comparing a mode size with word_mode, but word_mode is an enum +value thus this does not really make any sense. (Un)luckily E_DImode +happens to be 8 so this seemed to work, but let's make it correct so it +won't blow up when we add LA32 support or add another machine mode... + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_expand_conditional_move): Compare mode size with + UNITS_PER_WORD instead of word_mode. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 0c2c38f6f..77f83ab9e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -5349,7 +5349,7 @@ loongarch_expand_conditional_move (rtx *operands) + loongarch_emit_float_compare (&code, &op0, &op1); + else + { +- if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode) ++ if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) + { + promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && + REGNO (op0) == REGNO (operands[2])); +-- +2.43.0 + diff --git a/0169-LoongArch-Use-bstrins-for-value-1u-const.patch b/0169-LoongArch-Use-bstrins-for-value-1u-const.patch new file mode 100644 index 0000000..11190c3 --- /dev/null +++ b/0169-LoongArch-Use-bstrins-for-value-1u-const.patch @@ -0,0 +1,135 @@ +From 7e34bede110bfa7b2f91dc657c41ed0e7b4b11f7 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 9 Jun 2024 14:43:48 +0800 +Subject: [PATCH 169/188] LoongArch: Use bstrins for "value & (-1u << const)" + +A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and +pair, and twice fast as a srli/slli pair. When the src reg and the dst +reg happens to be the same, the move instruction can be optimized away. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (high_bitmask_operand): New + predicate. + * config/loongarch/constraints.md (Yy): New constriant. + * config/loongarch/loongarch.md (and3_align): New + define_insn_and_split. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-1.c: New test. + * gcc.target/loongarch/bstrins-2.c: New test. +--- + gcc/config/loongarch/constraints.md | 5 +++++ + gcc/config/loongarch/loongarch.md | 17 +++++++++++++++++ + gcc/config/loongarch/predicates.md | 4 ++++ + gcc/testsuite/gcc.target/loongarch/bstrins-1.c | 9 +++++++++ + gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++ + 5 files changed, 49 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index cec5d8857..f3f5776da 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -94,6 +94,7 @@ + ;; "A constant @code{move_operand} that can be safely loaded using + ;; @code{la}." + ;; "Yx" ++;; "Yy" + ;; "Z" - + ;; "ZC" + ;; "A memory operand whose address is formed by a base register and offset +@@ -291,6 +292,10 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + ++(define_constraint "Yy" ++ "@internal" ++ (match_operand 0 "high_bitmask_operand")) ++ + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 20494ce8a..55a759850 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1542,6 +1542,23 @@ + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + ++(define_insn_and_split "and3_align" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (and:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] ++ "" ++ "#" ++ "" ++ [(set (match_dup 0) (match_dup 1)) ++ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) ++ (const_int 0))] ++{ ++ int len; ++ ++ len = low_bitmask_len (mode, ~INTVAL (operands[2])); ++ operands[2] = GEN_INT (len); ++}) ++ + (define_insn_and_split "*bstrins__for_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 1d9a30695..95be8a4fe 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,6 +293,10 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + ++(define_predicate "high_bitmask_operand" ++ (and (match_code "const_int") ++ (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) ++ + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +new file mode 100644 +index 000000000..7cb3a9523 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ ++ ++long ++x (long a) ++{ ++ return a & -32; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +new file mode 100644 +index 000000000..9777f502e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\[0-9\]+,\\\$r0,4,0" } } */ ++ ++struct aligned_buffer { ++ _Alignas(32) char x[1024]; ++}; ++ ++extern int f(char *); ++int g(void) ++{ ++ struct aligned_buffer buf; ++ return f(buf.x); ++} +-- +2.43.0 + diff --git a/0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch b/0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch new file mode 100644 index 0000000..32bb89c --- /dev/null +++ b/0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch @@ -0,0 +1,158 @@ +From bdc189d43ef38ea53823120de8008f39ead0618d Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 15 Jun 2024 18:29:43 +0800 +Subject: [PATCH 170/188] LoongArch: Tweak IOR rtx_cost for bstrins + +Consider + + c &= 0xfff; + a &= ~0xfff; + b &= ~0xfff; + a |= c; + b |= c; + +This can be done with 2 bstrins instructions. But we need to recognize +it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff" +forward. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc: + (loongarch_use_bstrins_for_ior_with_mask): Split the main logic + into ... + (loongarch_use_bstrins_for_ior_with_mask_1): ... here. + (loongarch_rtx_costs): Special case for IOR those can be + implemented with bstrins. + +gcc/testsuite/ChangeLog; + + * gcc.target/loongarch/bstrins-3.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----- + .../gcc.target/loongarch/bstrins-3.c | 16 ++++ + 2 files changed, 72 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 77f83ab9e..cd9fa98dc 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -3678,6 +3678,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) + return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); + } + ++static int ++loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, ++ unsigned HOST_WIDE_INT mask1, ++ unsigned HOST_WIDE_INT mask2) ++{ ++ if (mask1 != ~mask2 || !mask1 || !mask2) ++ return 0; ++ ++ /* Try to avoid a right-shift. */ ++ if (low_bitmask_len (mode, mask1) != -1) ++ return -1; ++ ++ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) ++ return 1; ++ ++ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) ++ return -1; ++ ++ return 0; ++} ++ + /* Return the cost of moving between two registers of mode MODE. */ + + static int +@@ -3809,6 +3830,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + /* Fall through. */ + + case IOR: ++ { ++ rtx op[2] = {XEXP (x, 0), XEXP (x, 1)}; ++ if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND ++ && (mode == SImode || (TARGET_64BIT && mode == DImode))) ++ { ++ rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1); ++ if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1)) ++ { ++ unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0); ++ unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1); ++ if (loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ mask0, ++ mask1)) ++ { ++ /* A bstrins instruction */ ++ *total = COSTS_N_INSNS (1); ++ ++ /* A srai instruction */ ++ if (low_bitmask_len (mode, mask0) == -1 ++ && low_bitmask_len (mode, mask1) == -1) ++ *total += COSTS_N_INSNS (1); ++ ++ for (int i = 0; i < 2; i++) ++ *total += set_src_cost (XEXP (op[i], 0), mode, speed); ++ ++ return true; ++ } ++ } ++ } ++ } ++ ++ /* Fall through. */ + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), +@@ -5793,23 +5846,9 @@ bool loongarch_pre_reload_split (void) + int + loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op) + { +- unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]); +- unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]); +- +- if (mask1 != ~mask2 || !mask1 || !mask2) +- return 0; +- +- /* Try to avoid a right-shift. */ +- if (low_bitmask_len (mode, mask1) != -1) +- return -1; +- +- if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1) +- return 1; +- +- if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1) +- return -1; +- +- return 0; ++ return loongarch_use_bstrins_for_ior_with_mask_1 (mode, ++ UINTVAL (op[2]), ++ UINTVAL (op[4])); + } + + /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +new file mode 100644 +index 000000000..13762bdef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-rtl-final" } */ ++/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */ ++ ++struct X { ++ long a, b; ++}; ++ ++struct X ++test (long a, long b, long c) ++{ ++ c &= 0xfff; ++ a &= ~0xfff; ++ b &= ~0xfff; ++ return (struct X){.a = a | c, .b = b | c}; ++} +-- +2.43.0 + diff --git a/0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch b/0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch new file mode 100644 index 0000000..4fdc898 --- /dev/null +++ b/0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch @@ -0,0 +1,44 @@ +From 51c20768fde58093794ff0281c698b6738346313 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 16 Jun 2024 12:22:40 +0800 +Subject: [PATCH 171/188] LoongArch: NFC: Dedup and sort the comment in + loongarch_print_operand_reloc + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Dedup and sort the comment describing modifiers. +--- + gcc/config/loongarch/loongarch.cc | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index cd9fa98dc..35524b5da 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6129,21 +6129,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), + 'z' for (eq:?I ...), 'n' for (ne:?I ...). + 't' Like 'T', but with the EQ/NE cases reversed +- 'F' Print the FPU branch condition for comparison OP. +- 'W' Print the inverse of the FPU branch condition for comparison OP. +- 'w' Print a LSX register. + 'u' Print a LASX register. +- 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), +- 'z' for (eq:?I ...), 'n' for (ne:?I ...). +- 't' Like 'T', but with the EQ/NE cases reversed +- 'Y' Print loongarch_fp_conditions[INTVAL (OP)] +- 'Z' Print OP and a comma for 8CC, otherwise print nothing. +- 'z' Print $0 if OP is zero, otherwise print OP normally. + 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, + V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. + 'W' Print the inverse of the FPU branch condition for comparison OP. ++ 'w' Print a LSX register. + 'X' Print CONST_INT OP in hexadecimal format. + 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. + 'Y' Print loongarch_fp_conditions[INTVAL (OP)] +-- +2.43.0 + diff --git a/0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch b/0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch new file mode 100644 index 0000000..3a59718 --- /dev/null +++ b/0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch @@ -0,0 +1,45 @@ +From 9503e64bf304d44947791d9ff17d65a6905e59ce Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 28 Jun 2024 15:04:26 +0800 +Subject: [PATCH 172/188] LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c + tests. + +After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX. +Cause these two tests to fail. To guarantee that these two tests pass, +add the compilation option '-fno-late-combine-instructions'. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c: + Add compilation options '-fno-late-combine-instructions'. + * gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise. +--- + .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +- + gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +index 3797556e1..e9eb0d6f7 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +index f66903091..fed478458 100644 +--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c ++++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */ ++/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -fno-late-combine-instructions" } */ + + __thread int a __attribute__((visibility("hidden"))); + extern __thread int b __attribute__((visibility("default"))); +-- +2.43.0 + diff --git a/0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch b/0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch new file mode 100644 index 0000000..46b78b8 --- /dev/null +++ b/0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch @@ -0,0 +1,70 @@ +From 727b1a2cff9cecd904545895bbf39a89fbf1ea4f Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 28 Jun 2024 15:09:48 +0800 +Subject: [PATCH 173/188] LoongArch: Define loongarch_insn_cost and set the + cost of movcf2gr and movgr2cf. + +The following two FAIL items have been fixed: + +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movcf2fr\\t\\\\\$f[0-9]+,\\\\\$fcc +FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler movfr2gr\\\\.s\\t\\\\\$r4 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc (loongarch_insn_cost): + New function. + (TARGET_INSN_COST): New macro. +--- + gcc/config/loongarch/loongarch.cc | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 35524b5da..958e82b86 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4369,6 +4369,33 @@ loongarch_address_cost (rtx addr, machine_mode mode, + return loongarch_address_insns (addr, mode, false); + } + ++/* Implement TARGET_INSN_COST. */ ++ ++static int ++loongarch_insn_cost (rtx_insn *insn, bool speed) ++{ ++ rtx x = PATTERN (insn); ++ int cost = pattern_cost (x, speed); ++ ++ /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr. */ ++ if (GET_CODE (x) == SET ++ && GET_MODE (XEXP (x, 0)) == FCCmode) ++ { ++ rtx dest, src; ++ dest = XEXP (x, 0); ++ src = XEXP (x, 1); ++ ++ if (REG_P (dest) && REG_P (src)) ++ { ++ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src))) ++ cost = loongarch_cost->movcf2gr; ++ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src))) ++ cost = loongarch_cost->movgr2cf; ++ } ++ } ++ return cost; ++} ++ + /* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ +@@ -11089,6 +11116,8 @@ loongarch_asm_code_end (void) + #define TARGET_RTX_COSTS loongarch_rtx_costs + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST loongarch_address_cost ++#undef TARGET_INSN_COST ++#define TARGET_INSN_COST loongarch_insn_cost + #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST + #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + loongarch_builtin_vectorization_cost +-- +2.43.0 + diff --git a/0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch b/0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch new file mode 100644 index 0000000..6f728f9 --- /dev/null +++ b/0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch @@ -0,0 +1,73 @@ +From b6b917847a25afbaba9983e80b62b69ed3ce3983 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 4 Jul 2024 10:37:26 +0800 +Subject: [PATCH 174/188] LoongArch: TFmode is not allowed to be stored in the + float register. + + PR target/115752 + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_hard_regno_mode_ok_uncached): Replace + UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE. + * config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/pr115752.c: New test. +--- + gcc/config/loongarch/loongarch.cc | 2 +- + gcc/config/loongarch/loongarch.h | 7 ------- + gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 ++++++++ + 3 files changed, 9 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 958e82b86..b78512e0e 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -6760,7 +6760,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +- return size <= UNITS_PER_FPVALUE; ++ return size <= UNITS_PER_HWFPVALUE; + + /* Allow integer modes that fit into a single register. We need + to put integers into FPRs when using instructions like CVT +diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h +index 6743d2684..a23dabde1 100644 +--- a/gcc/config/loongarch/loongarch.h ++++ b/gcc/config/loongarch/loongarch.h +@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3. If not see + #define UNITS_PER_HWFPVALUE \ + (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG) + +-/* The largest size of value that can be held in floating-point +- registers. */ +-#define UNITS_PER_FPVALUE \ +- (TARGET_SOFT_FLOAT ? 0 \ +- : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \ +- : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) +- + /* The number of bytes in a double. */ + #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) + +diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c b/gcc/testsuite/gcc.target/loongarch/pr115752.c +new file mode 100644 +index 000000000..df4bae524 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++long double ++test (long double xx) ++{ ++ __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in an 'asm'" } */ ++ return xx + 1; ++} +-- +2.43.0 + diff --git a/0175-LoongArch-Remove-unreachable-codes.patch b/0175-LoongArch-Remove-unreachable-codes.patch new file mode 100644 index 0000000..c4a674a --- /dev/null +++ b/0175-LoongArch-Remove-unreachable-codes.patch @@ -0,0 +1,249 @@ +From a75609d90f506709e4af26701aa4fb6adce00700 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 4 Jul 2024 15:00:40 +0800 +Subject: [PATCH 175/188] LoongArch: Remove unreachable codes. + +gcc/ChangeLog: + + * config/loongarch/loongarch.cc + (loongarch_split_move): Delete. + (loongarch_hard_regno_mode_ok_uncached): Likewise. + * config/loongarch/loongarch.md + (move_doubleword_fpr): Likewise. + (load_low): Likewise. + (load_high): Likewise. + (store_word): Likewise. + (movgr2frh): Likewise. + (movfrh2gr): Likewise. +--- + gcc/config/loongarch/loongarch.cc | 47 +++---------- + gcc/config/loongarch/loongarch.md | 109 ------------------------------ + 2 files changed, 8 insertions(+), 148 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index b78512e0e..260dd7b5f 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4459,42 +4459,13 @@ loongarch_split_move_p (rtx dest, rtx src) + void + loongarch_split_move (rtx dest, rtx src) + { +- rtx low_dest; +- + gcc_checking_assert (loongarch_split_move_p (dest, src)); + if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_128bit_move (dest, src); + else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) + loongarch_split_256bit_move (dest, src); +- else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) +- { +- if (!TARGET_64BIT && GET_MODE (dest) == DImode) +- emit_insn (gen_move_doubleword_fprdi (dest, src)); +- else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) +- emit_insn (gen_move_doubleword_fprdf (dest, src)); +- else if (TARGET_64BIT && GET_MODE (dest) == TFmode) +- emit_insn (gen_move_doubleword_fprtf (dest, src)); +- else +- gcc_unreachable (); +- } + else +- { +- /* The operation can be split into two normal moves. Decide in +- which order to do them. */ +- low_dest = loongarch_subword (dest, false); +- if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) +- { +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- } +- else +- { +- loongarch_emit_move (low_dest, loongarch_subword (src, false)); +- loongarch_emit_move (loongarch_subword (dest, true), +- loongarch_subword (src, true)); +- } +- } ++ gcc_unreachable (); + } + + /* Check if adding an integer constant value for a specific mode can be +@@ -6743,20 +6714,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); + +- if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode) ++ if (GP_REG_P (regno) ++ && !LSX_SUPPORTED_MODE_P (mode) + && !LASX_SUPPORTED_MODE_P (mode)) + return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + +- /* For LSX, allow TImode and 128-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode)) +- return true; +- +- /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */ +- if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode)) +- return true; +- + if (FP_REG_P (regno)) + { ++ /* Allow 128-bit or 256-bit vector modes in all FPR. */ ++ if (LSX_SUPPORTED_MODE_P (mode) ++ || LASX_SUPPORTED_MODE_P (mode)) ++ return true; ++ + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 55a759850..16f9f37c8 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -400,9 +400,6 @@ + ;; 64-bit modes for which we provide move patterns. + (define_mode_iterator MOVE64 [DI DF]) + +-;; 128-bit modes for which we provide move patterns on 64-bit targets. +-(define_mode_iterator MOVE128 [TI TF]) +- + ;; Iterator for sub-32-bit integer modes. + (define_mode_iterator SHORT [QI HI]) + +@@ -421,12 +418,6 @@ + (define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT") + (DI "TARGET_DOUBLE_FLOAT")]) + +-;; A mode for which moves involving FPRs may need to be split. +-(define_mode_iterator SPLITF +- [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") +- (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")]) +- + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. + (define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF]) +@@ -2421,41 +2412,6 @@ + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")]) + +-;; Emit a doubleword move in which exactly one of the operands is +-;; a floating-point register. We can't just emit two normal moves +-;; because of the constraints imposed by the FPU register model; +-;; see loongarch_can_change_mode_class for details. Instead, we keep +-;; the FPR whole and use special patterns to refer to each word of +-;; the other operand. +- +-(define_expand "move_doubleword_fpr" +- [(set (match_operand:SPLITF 0) +- (match_operand:SPLITF 1))] +- "" +-{ +- if (FP_REG_RTX_P (operands[0])) +- { +- rtx low = loongarch_subword (operands[1], 0); +- rtx high = loongarch_subword (operands[1], 1); +- emit_insn (gen_load_low (operands[0], low)); +- if (!TARGET_64BIT) +- emit_insn (gen_movgr2frh (operands[0], high, operands[0])); +- else +- emit_insn (gen_load_high (operands[0], high, operands[0])); +- } +- else +- { +- rtx low = loongarch_subword (operands[0], 0); +- rtx high = loongarch_subword (operands[0], 1); +- emit_insn (gen_store_word (low, operands[1], const0_rtx)); +- if (!TARGET_64BIT) +- emit_insn (gen_movfrh2gr (high, operands[1])); +- else +- emit_insn (gen_store_word (high, operands[1], const1_rtx)); +- } +- DONE; +-}) +- + ;; Clear one FCC register + + (define_expand "movfcc" +@@ -2742,49 +2698,6 @@ + [(set_attr "type" "fcvt") + (set_attr "mode" "")]) + +-;; Load the low word of operand 0 with operand 1. +-(define_insn "load_low" +- [(set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF [(match_operand: 1 "general_operand" "rJ,m")] +- UNSPEC_LOAD_LOW))] +- "TARGET_HARD_FLOAT" +-{ +- operands[0] = loongarch_subword (operands[0], 0); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "")]) +- +-;; Load the high word of operand 0 from operand 1, preserving the value +-;; in the low word. +-(define_insn "load_high" +- [(set (match_operand:SPLITF 0 "register_operand" "=f,f") +- (unspec:SPLITF [(match_operand: 1 "general_operand" "rJ,m") +- (match_operand:SPLITF 2 "register_operand" "0,0")] +- UNSPEC_LOAD_HIGH))] +- "TARGET_HARD_FLOAT" +-{ +- operands[0] = loongarch_subword (operands[0], 1); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mgtf,fpload") +- (set_attr "mode" "")]) +- +-;; Store one word of operand 1 in operand 0. Operand 2 is 1 to store the +-;; high word and 0 to store the low word. +-(define_insn "store_word" +- [(set (match_operand: 0 "nonimmediate_operand" "=r,m") +- (unspec: [(match_operand:SPLITF 1 "register_operand" "f,f") +- (match_operand 2 "const_int_operand")] +- UNSPEC_STORE_WORD))] +- "TARGET_HARD_FLOAT" +-{ +- operands[1] = loongarch_subword (operands[1], INTVAL (operands[2])); +- return loongarch_output_move (operands[0], operands[1]); +-} +- [(set_attr "move_type" "mftg,fpstore") +- (set_attr "mode" "")]) +- + ;; Thread-Local Storage + + (define_insn "@got_load_tls_desc" +@@ -2876,28 +2789,6 @@ + (const_int 4) + (const_int 2)))]) + +-;; Move operand 1 to the high word of operand 0 using movgr2frh.w, preserving the +-;; value in the low word. +-(define_insn "movgr2frh" +- [(set (match_operand:SPLITF 0 "register_operand" "=f") +- (unspec:SPLITF [(match_operand: 1 "reg_or_0_operand" "rJ") +- (match_operand:SPLITF 2 "register_operand" "0")] +- UNSPEC_MOVGR2FRH))] +- "TARGET_DOUBLE_FLOAT" +- "movgr2frh.w\t%z1,%0" +- [(set_attr "move_type" "mgtf") +- (set_attr "mode" "")]) +- +-;; Move high word of operand 1 to operand 0 using movfrh2gr.s. +-(define_insn "movfrh2gr" +- [(set (match_operand: 0 "register_operand" "=r") +- (unspec: [(match_operand:SPLITF 1 "register_operand" "f")] +- UNSPEC_MOVFRH2GR))] +- "TARGET_DOUBLE_FLOAT" +- "movfrh2gr.s\t%0,%1" +- [(set_attr "move_type" "mftg") +- (set_attr "mode" "")]) +- + + ;; Expand in-line code to clear the instruction cache between operand[0] and + ;; operand[1]. +-- +2.43.0 + diff --git a/0176-LoongArch-Organize-the-code-related-to-split-move-an.patch b/0176-LoongArch-Organize-the-code-related-to-split-move-an.patch new file mode 100644 index 0000000..a34163b --- /dev/null +++ b/0176-LoongArch-Organize-the-code-related-to-split-move-an.patch @@ -0,0 +1,413 @@ +From 95089699271d235efc29ae48b78f8c7f1b6386c4 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Fri, 12 Jul 2024 09:57:40 +0800 +Subject: [PATCH 176/188] LoongArch: Organize the code related to split move + and merge the same functions. + +gcc/ChangeLog: + + * config/loongarch/loongarch-protos.h + (loongarch_split_128bit_move): Delete. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_vector_move): Add a function declaration. + * config/loongarch/loongarch.cc + (loongarch_vector_costs::finish_cost): Adjust the code + formatting. + (loongarch_split_vector_move_p): Merge + loongarch_split_128bit_move_p and loongarch_split_256bit_move_p. + (loongarch_split_move_p): Merge code. + (loongarch_split_move): Likewise. + (loongarch_split_128bit_move_p): Delete. + (loongarch_split_256bit_move_p): Delete. + (loongarch_split_128bit_move): Delete. + (loongarch_split_vector_move): Merge loongarch_split_128bit_move + and loongarch_split_256bit_move. + (loongarch_split_256bit_move): Delete. + (loongarch_global_init): Remove the extra semicolon at the + end of the function. + * config/loongarch/loongarch.md (*movdf_softfloat): Added a new + condition TARGET_64BIT. +--- + gcc/config/loongarch/loongarch-protos.h | 5 +- + gcc/config/loongarch/loongarch.cc | 221 ++++++------------------ + gcc/config/loongarch/loongarch.md | 1 + + 3 files changed, 58 insertions(+), 169 deletions(-) + +diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h +index 0c31a74b7..abf1a0893 100644 +--- a/gcc/config/loongarch/loongarch-protos.h ++++ b/gcc/config/loongarch/loongarch-protos.h +@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx); + extern void loongarch_split_move (rtx, rtx); + extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); + extern void loongarch_split_plus_constant (rtx *, machine_mode); +-extern void loongarch_split_128bit_move (rtx, rtx); +-extern bool loongarch_split_128bit_move_p (rtx, rtx); +-extern void loongarch_split_256bit_move (rtx, rtx); +-extern bool loongarch_split_256bit_move_p (rtx, rtx); ++extern void loongarch_split_vector_move (rtx, rtx); + extern const char *loongarch_output_move (rtx, rtx); + #ifdef RTX_CODE + extern void loongarch_expand_scc (rtx *); +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 260dd7b5f..53bd8d7ec 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -4351,10 +4351,10 @@ void + loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) + { + loop_vec_info loop_vinfo = dyn_cast (m_vinfo); ++ + if (loop_vinfo) +- { +- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo); +- } ++ m_suggested_unroll_factor ++ = determine_suggested_unroll_factor (loop_vinfo); + + vector_costs::finish_cost (scalar_costs); + } +@@ -4420,6 +4420,7 @@ loongarch_subword (rtx op, bool high_p) + return simplify_gen_subreg (word_mode, op, mode, byte); + } + ++static bool loongarch_split_vector_move_p (rtx dest, rtx src); + /* Return true if a move from SRC to DEST should be split into two. + SPLIT_TYPE describes the split condition. */ + +@@ -4441,13 +4442,11 @@ loongarch_split_move_p (rtx dest, rtx src) + return false; + } + +- /* Check if LSX moves need splitting. */ +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_128bit_move_p (dest, src); + +- /* Check if LASX moves need splitting. */ +- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- return loongarch_split_256bit_move_p (dest, src); ++ /* Check if vector moves need splitting. */ ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ return loongarch_split_vector_move_p (dest, src); + + /* Otherwise split all multiword moves. */ + return size > UNITS_PER_WORD; +@@ -4460,10 +4459,9 @@ void + loongarch_split_move (rtx dest, rtx src) + { + gcc_checking_assert (loongarch_split_move_p (dest, src)); +- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_128bit_move (dest, src); +- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest))) +- loongarch_split_256bit_move (dest, src); ++ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)) ++ || LASX_SUPPORTED_MODE_P (GET_MODE (dest))) ++ loongarch_split_vector_move (dest, src); + else + gcc_unreachable (); + } +@@ -4585,224 +4583,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr) + + return insn[ldr][index-2]; + } +-/* Return true if a 128-bit move from SRC to DEST should be split. */ +- +-bool +-loongarch_split_128bit_move_p (rtx dest, rtx src) +-{ +- /* LSX-to-LSX moves can be done in a single instruction. */ +- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) +- return false; +- +- /* Check for LSX loads and stores. */ +- if (FP_REG_RTX_P (dest) && MEM_P (src)) +- return false; +- if (FP_REG_RTX_P (src) && MEM_P (dest)) +- return false; +- +- /* Check for LSX set to an immediate const vector with valid replicated +- element. */ +- if (FP_REG_RTX_P (dest) +- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) +- return false; +- +- /* Check for LSX load zero immediate. */ +- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) +- return false; +- +- return true; +-} +- +-/* Return true if a 256-bit move from SRC to DEST should be split. */ ++/* Return true if a vector move from SRC to DEST should be split. */ + +-bool +-loongarch_split_256bit_move_p (rtx dest, rtx src) ++static bool ++loongarch_split_vector_move_p (rtx dest, rtx src) + { +- /* LSX-to-LSX moves can be done in a single instruction. */ ++ /* Vector moves can be done in a single instruction. */ + if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + +- /* Check for LSX loads and stores. */ ++ /* Check for vector loads and stores. */ + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + +- /* Check for LSX set to an immediate const vector with valid replicated ++ /* Check for vector set to an immediate const vector with valid replicated + element. */ + if (FP_REG_RTX_P (dest) + && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + return false; + +- /* Check for LSX load zero immediate. */ ++ /* Check for vector load zero immediate. */ + if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))) + return false; + + return true; + } + +-/* Split a 128-bit move from SRC to DEST. */ ++/* Split a vector move from SRC to DEST. */ + + void +-loongarch_split_128bit_move (rtx dest, rtx src) ++loongarch_split_vector_move (rtx dest, rtx src) + { + int byte, index; +- rtx low_dest, low_src, d, s; ++ rtx s, d; ++ machine_mode mode = GET_MODE (dest); ++ bool lsx_p = LSX_SUPPORTED_MODE_P (mode); + + if (FP_REG_RTX_P (dest)) + { + gcc_assert (!MEM_P (src)); + +- rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V4SImode) +- new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V2DImode) +- new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0); +- } +- +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- } +- } +- else if (FP_REG_RTX_P (src)) +- { +- gcc_assert (!MEM_P (dest)); +- +- rtx new_src = src; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (src) != V4SImode) +- new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0); +- } +- else +- { +- if (GET_MODE (src) != V2DImode) +- new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0); +- } ++ rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx); + +- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD, index++) +- { +- d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); +- } +- } +- else +- { +- low_dest = loongarch_subword_at_byte (dest, 0); +- low_src = loongarch_subword_at_byte (src, 0); +- gcc_assert (REG_P (low_dest) && REG_P (low_src)); +- /* Make sure the source register is not written before reading. */ +- if (REGNO (low_dest) <= REGNO (low_src)) ++ if (lsx_p) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); +- byte += UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V2DImode; ++ gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d; + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) +- { +- d = loongarch_subword_at_byte (dest, byte); +- s = loongarch_subword_at_byte (src, byte); +- loongarch_emit_move (d, s); +- } ++ mode = V4DImode; ++ gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d; + } +- } +-} +- +-/* Split a 256-bit move from SRC to DEST. */ +- +-void +-loongarch_split_256bit_move (rtx dest, rtx src) +-{ +- int byte, index; +- rtx low_dest, low_src, d, s; +- +- if (FP_REG_RTX_P (dest)) +- { +- gcc_assert (!MEM_P (src)); + + rtx new_dest = dest; +- if (!TARGET_64BIT) +- { +- if (GET_MODE (dest) != V8SImode) +- new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0); +- } +- else +- { +- if (GET_MODE (dest) != V4DImode) +- new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0); +- } ++ ++ if (GET_MODE (dest) != mode) ++ new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0); + + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD, index++) + { + s = loongarch_subword_at_byte (src, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest, +- GEN_INT (1 << index))); +- else +- emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest, +- GEN_INT (1 << index))); ++ emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest, ++ GEN_INT (1 << index))); + } + } + else if (FP_REG_RTX_P (src)) + { + gcc_assert (!MEM_P (dest)); + +- rtx new_src = src; +- if (!TARGET_64BIT) ++ rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx); ++ ++ if (lsx_p) + { +- if (GET_MODE (src) != V8SImode) +- new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0); ++ mode = V2DImode; ++ gen_vpickve2gr_d = gen_lsx_vpickve2gr_d; + } + else + { +- if (GET_MODE (src) != V4DImode) +- new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0); ++ mode = V4DImode; ++ gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d; + } + ++ rtx new_src = src; ++ if (GET_MODE (src) != mode) ++ new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0); ++ + for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src)); + byte += UNITS_PER_WORD, index++) + { + d = loongarch_subword_at_byte (dest, byte); +- if (!TARGET_64BIT) +- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index))); +- else +- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index))); ++ emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index))); + } + } + else + { ++ /* This part of the code is designed to handle the following situations: ++ (set (reg:V2DI 4 $r4) ++ (reg:V2DI 6 $r6)) ++ The trigger test case is lsx-mov-1.c. */ ++ rtx low_dest, low_src; ++ + low_dest = loongarch_subword_at_byte (dest, 0); + low_src = loongarch_subword_at_byte (src, 0); + gcc_assert (REG_P (low_dest) && REG_P (low_src)); + /* Make sure the source register is not written before reading. */ + if (REGNO (low_dest) <= REGNO (low_src)) + { +- for (byte = 0; byte < GET_MODE_SIZE (TImode); ++ for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest)); + byte += UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); +@@ -4812,8 +4703,8 @@ loongarch_split_256bit_move (rtx dest, rtx src) + } + else + { +- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0; +- byte -= UNITS_PER_WORD) ++ for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD; ++ byte >= 0; byte -= UNITS_PER_WORD) + { + d = loongarch_subword_at_byte (dest, byte); + s = loongarch_subword_at_byte (src, byte); +@@ -7603,7 +7494,7 @@ loongarch_global_init (void) + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; +-}; ++} + + static void + loongarch_reg_init (void) +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 16f9f37c8..8bcb43042 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -2406,6 +2406,7 @@ + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:DF 1 "move_operand" "rG,m,rG"))] + "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) ++ && TARGET_64BIT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + { return loongarch_output_move (operands[0], operands[1]); } +-- +2.43.0 + diff --git a/0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch b/0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch new file mode 100644 index 0000000..a748134 --- /dev/null +++ b/0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch @@ -0,0 +1,364 @@ +From 34c8e935780d43a797e403ca6604115ec393f0e6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sat, 20 Jul 2024 20:38:13 +0800 +Subject: [PATCH 177/188] LoongArch: Expand some SImode operations through + "si3_extend" instructions if TARGET_64BIT + +We already had "si3_extend" insns and we hoped the fwprop or combine +passes can use them to remove unnecessary sign extensions. But this +does not always work: for cases like x << 1 | y, the compiler +tends to do + + (sign_extend:DI + (ior:SI (ashift:SI (reg:SI $r4) + (const_int 1)) + (reg:SI $r5))) + +instead of + + (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1))) + (sign_extend:DI (reg:SI $r5))) + +So we cannot match the ashlsi3_extend instruction here and we get: + + slli.w $r4,$r4,1 + or $r4,$r5,$r4 + slli.w $r4,$r4,0 # <= redundant + jr $r1 + +To eliminate this redundant extension we need to turn SImode shift etc. +to DImode "si3_extend" operations earlier, when we expand the SImode +operation. We are already doing this for addition, now do it for +shifts, rotates, substract, multiplication, division, and modulo as +well. + +The bytepick.w definition for TARGET_64BIT needs to be adjusted so it +won't be undone by the shift expanding. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (optab): Add (rotatert "rotr"). + (3, 3, + sub3, rotr3, mul3): Add a "*" to the insn name + so we can redefine the names with define_expand. + (*si3_extend): Remove "*" so we can use them + in expanders. + (*subsi3_extended, *mulsi3_extended): Likewise, also remove the + trailing "ed" for consistency. + (*si3_extended): Add mode for sign_extend to + prevent an ICE using it in expanders. + (shift_w, arith_w): New define_code_iterator. + (3): New define_expand. Expand with + si3_extend for SImode if TARGET_64BIT. + (3): Likewise. + (mul3): Expand to mulsi3_extended for SImode if + TARGET_64BIT and ISA_HAS_DIV32. + (3): Expand to si3_extended + for SImode if TARGET_64BIT. + (rotl3): Expand to rotrsi3_extend for SImode if + TARGET_64BIT. + (bytepick_w_): Add mode for lshiftrt and ashift. + (bitsize, bytepick_imm, bytepick_w_ashift_amount): New + define_mode_attr. + (bytepick_w__extend): Adjust for the RTL change + caused by 32-bit shift expanding. Now bytepick_imm only covers + 2 and 3, separate one remaining case to ... + (bytepick_w_1_extend): ... here, new define_insn. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bitwise_extend.c: New test. +--- + gcc/config/loongarch/loongarch.md | 131 +++++++++++++++--- + .../gcc.target/loongarch/bitwise_extend.c | 45 ++++++ + 2 files changed, 154 insertions(+), 22 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 8bcb43042..6915dab0e 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -546,6 +546,7 @@ + (define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") ++ (rotatert "rotr") + (ior "ior") + (xor "xor") + (and "and") +@@ -624,6 +625,49 @@ + (48 "6") + (56 "7")]) + ++;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT ++;; so the redundant sign extension can be removed if the output is used as ++;; an input of a bitwise operation. Note plus, rotl, and div are handled ++;; separately. ++(define_code_iterator shift_w [any_shift rotatert]) ++(define_code_iterator arith_w [minus mult]) ++ ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (shift_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:SI 2 "arith_operand" "rI")))] ++ "" ++{ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extend (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++}) ++ ++(define_expand "3" ++ [(set (match_operand:GPR 0 "register_operand" "=r") ++ (arith_w:GPR (match_operand:GPR 1 "register_operand" "r") ++ (match_operand:GPR 2 "register_operand" "r")))] ++ "" ++{ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extend (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++}) ++ + ;; + ;; .................... + ;; +@@ -781,7 +825,7 @@ + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +-(define_insn "sub3" ++(define_insn "*sub3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (minus:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] +@@ -791,7 +835,7 @@ + (set_attr "mode" "")]) + + +-(define_insn "*subsi3_extended" ++(define_insn "subsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") +@@ -818,7 +862,7 @@ + [(set_attr "type" "fmul") + (set_attr "mode" "")]) + +-(define_insn "mul3" ++(define_insn "*mul3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "register_operand" "r")))] +@@ -827,7 +871,7 @@ + [(set_attr "type" "imul") + (set_attr "mode" "")]) + +-(define_insn "*mulsi3_extended" ++(define_insn "mulsi3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") +@@ -1001,8 +1045,19 @@ + (match_operand:GPR 2 "register_operand")))] + "" + { +- if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !ISA_HAS_DIV32) ++ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT) + { ++ if (ISA_HAS_DIV32) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ emit_insn (gen_si3_extended (t, operands[1], operands[2])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } ++ + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx rd = gen_reg_rtx (DImode); +@@ -1038,7 +1093,7 @@ + + (define_insn "si3_extended" + [(set (match_operand:DI 0 "register_operand" "=r,&r,&r") +- (sign_extend ++ (sign_extend:DI + (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0") + (match_operand:SI 2 "register_operand" "r,r,r"))))] + "TARGET_64BIT && ISA_HAS_DIV32" +@@ -2981,7 +3036,7 @@ + ;; + ;; .................... + +-(define_insn "3" ++(define_insn "*3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_shift:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:SI 2 "arith_operand" "rI")))] +@@ -2996,7 +3051,7 @@ + [(set_attr "type" "shift") + (set_attr "mode" "")]) + +-(define_insn "*si3_extend" ++(define_insn "si3_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (any_shift:SI (match_operand:SI 1 "register_operand" "r") +@@ -3011,7 +3066,7 @@ + [(set_attr "type" "shift") + (set_attr "mode" "SI")]) + +-(define_insn "rotr3" ++(define_insn "*rotr3" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,I")))] +@@ -3040,6 +3095,19 @@ + "" + { + operands[3] = gen_reg_rtx (SImode); ++ ++ if (TARGET_64BIT && mode == SImode) ++ { ++ rtx t = gen_reg_rtx (DImode); ++ ++ emit_insn (gen_negsi2 (operands[3], operands[2])); ++ emit_insn (gen_rotrsi3_extend (t, operands[1], operands[3])); ++ t = gen_lowpart (SImode, t); ++ SUBREG_PROMOTED_VAR_P (t) = 1; ++ SUBREG_PROMOTED_SET (t, SRP_SIGNED); ++ emit_move_insn (operands[0], t); ++ DONE; ++ } + }); + + ;; The following templates were added to generate "bstrpick.d + alsl.d" +@@ -4061,26 +4129,45 @@ + + (define_insn "bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") +- (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r") +- (const_int )) +- (ashift (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount))))] ++ (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int )) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int bytepick_w_ashift_amount))))] + "" + "bytepick.w\t%0,%1,%2," + [(set_attr "mode" "SI")]) + ++(define_mode_attr bitsize [(QI "8") (HI "16")]) ++(define_mode_attr bytepick_imm [(QI "3") (HI "2")]) ++(define_mode_attr bytepick_w_ashift_amount [(QI "24") (HI "16")]) ++ + (define_insn "bytepick_w__extend" + [(set (match_operand:DI 0 "register_operand" "=r") +- (sign_extend:DI +- (subreg:SI +- (ior:DI (subreg:DI (lshiftrt +- (match_operand:SI 1 "register_operand" "r") +- (const_int )) 0) +- (subreg:DI (ashift +- (match_operand:SI 2 "register_operand" "r") +- (const_int bytepick_w_ashift_amount)) 0)) 0)))] ++ (ior:DI ++ (ashift:DI ++ (sign_extend:DI ++ (subreg:SHORT (match_operand:DI 1 "register_operand" "r") 0)) ++ (const_int )) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int ) ++ (const_int ))))] + "TARGET_64BIT" +- "bytepick.w\t%0,%1,%2," ++ "bytepick.w\t%0,%2,%1," ++ [(set_attr "mode" "SI")]) ++ ++(define_insn "bytepick_w_1_extend" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ior:DI ++ (ashift:DI ++ (sign_extract:DI (match_operand:DI 1 "register_operand" "r") ++ (const_int 24) ++ (const_int 0)) ++ (const_int 8)) ++ (zero_extract:DI (match_operand:DI 2 "register_operand" "r") ++ (const_int 8) ++ (const_int 24))))] ++ "TARGET_64BIT" ++ "bytepick.w\t%0,%2,%1,1" + [(set_attr "mode" "SI")]) + + (define_insn "bytepick_d_" +diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +new file mode 100644 +index 000000000..c2bc489a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bitwise_extend.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mdiv32" } */ ++/* { dg-final { scan-assembler-not "slli\\.w" } } */ ++ ++int ++f1 (int a, int b) ++{ ++ return (a << b) | b; ++} ++ ++int ++f2 (int a, int b) ++{ ++ return (a - b) | b; ++} ++ ++int ++f3 (int a, int b) ++{ ++ return (a * b) | b; ++} ++ ++int ++f4 (int a, int b) ++{ ++ return (unsigned) a >> b | (unsigned) a << (32 - b) | b; ++} ++ ++int ++f5 (int a, int b) ++{ ++ return (unsigned) a << b | (unsigned) a >> (32 - b) | b; ++} ++ ++int ++f6 (int a, int b) ++{ ++ return (a % b) | b; ++} ++ ++int ++f7 (int a, int b) ++{ ++ return (a + b) | b; ++} +-- +2.43.0 + diff --git a/0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch b/0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch new file mode 100644 index 0000000..63c05e4 --- /dev/null +++ b/0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch @@ -0,0 +1,123 @@ +From 8311e0053c8a9646b8798c53ae4a8f45d12c42c1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 28 Jul 2024 17:02:49 +0800 +Subject: [PATCH 178/188] LoongArch: Relax ins_zero_bitmask_operand and remove + and3_align + +In r15-1207 I was too stupid to realize we just need to relax +ins_zero_bitmask_operand to allow using bstrins for aligning, instead of +adding a new split. And, "> 12" in ins_zero_bitmask_operand also makes +no sense: it rejects bstrins for things like "x & ~4l" with no good +reason. + +So fix my errors now. + +gcc/ChangeLog: + + * config/loongarch/predicates.md (ins_zero_bitmask_operand): + Cover more cases that bstrins can benefit. + (high_bitmask_operand): Remove. + * config/loongarch/constraints.md (Yy): Remove. + * config/loongarch/loongarch.md (and3_align): Remove. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/bstrins-4.c: New test. +--- + gcc/config/loongarch/constraints.md | 4 ---- + gcc/config/loongarch/loongarch.md | 17 ----------------- + gcc/config/loongarch/predicates.md | 9 ++------- + gcc/testsuite/gcc.target/loongarch/bstrins-4.c | 9 +++++++++ + 4 files changed, 11 insertions(+), 28 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c + +diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md +index f3f5776da..d653ea82a 100644 +--- a/gcc/config/loongarch/constraints.md ++++ b/gcc/config/loongarch/constraints.md +@@ -292,10 +292,6 @@ + "@internal" + (match_operand 0 "low_bitmask_operand")) + +-(define_constraint "Yy" +- "@internal" +- (match_operand 0 "high_bitmask_operand")) +- + (define_constraint "YI" + "@internal + A replicated vector const in which the replicated value is in the range +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 6915dab0e..1ebcfa0c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1588,23 +1588,6 @@ + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + +-(define_insn_and_split "and3_align" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (and:GPR (match_operand:GPR 1 "register_operand" "r") +- (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] +- "" +- "#" +- "" +- [(set (match_dup 0) (match_dup 1)) +- (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) +- (const_int 0))] +-{ +- int len; +- +- len = low_bitmask_len (mode, ~INTVAL (operands[2])); +- operands[2] = GEN_INT (len); +-}) +- + (define_insn_and_split "*bstrins__for_mask" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") +diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md +index 95be8a4fe..2b7f7ed47 100644 +--- a/gcc/config/loongarch/predicates.md ++++ b/gcc/config/loongarch/predicates.md +@@ -293,10 +293,6 @@ + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +-(define_predicate "high_bitmask_operand" +- (and (match_code "const_int") +- (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) +- + (define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) +@@ -406,11 +402,10 @@ + + (define_predicate "ins_zero_bitmask_operand" + (and (match_code "const_int") +- (match_test "INTVAL (op) != -1") +- (match_test "INTVAL (op) & 1") + (match_test "low_bitmask_len (mode, \ + ~UINTVAL (op) | (~UINTVAL(op) - 1)) \ +- > 12"))) ++ > 0") ++ (not (match_operand 0 "const_uns_arith_operand")))) + + (define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +new file mode 100644 +index 000000000..0823cfc38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */ ++ ++long ++x (long a) ++{ ++ return a & ~4; ++} +-- +2.43.0 + diff --git a/0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch b/0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch new file mode 100644 index 0000000..8dcdd3e --- /dev/null +++ b/0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch @@ -0,0 +1,224 @@ +From 54bf8fc616af5cdb9e4c787a2dfb2c516c8e425a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Sun, 28 Jul 2024 19:57:02 +0800 +Subject: [PATCH 179/188] LoongArch: Rework bswap{hi,si,di}2 definition + +Per a gcc-help thread we are generating sub-optimal code for +__builtin_bswap{32,64}. To fix it: + +- Use a single revb.d instruction for bswapdi2. +- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT, + revb.2h + rotri.w for !TARGET_64BIT. +- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single + revb.2w instruction for bswapdi2 (x) r>> 32. + +Unfortunately I cannot figure out a way to make the compiler generate +revb.4h or revh.{2w,d} instructions. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H, + UNSPEC_REVH_D): Remove UNSPECs. + (revb_4h, revh_d): Remove define_insn. + (revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of + an UNSPEC. + (revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn. + (bswapsi2): Change to define_expand. Only expand to revb.2h + + rotri.w if !TARGET_64BIT. + (bswapdi2): Change to define_insn of which the output is just a + revb.d instruction. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/revb.c: New test. +--- + gcc/config/loongarch/loongarch.md | 79 ++++++++++++----------- + gcc/testsuite/gcc.target/loongarch/revb.c | 61 +++++++++++++++++ + 2 files changed, 104 insertions(+), 36 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 1ebcfa0c7..b1c828dba 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -20,11 +20,6 @@ + ;; . + + (define_c_enum "unspec" [ +- ;; Integer operations that are too cumbersome to describe directly. +- UNSPEC_REVB_2H +- UNSPEC_REVB_4H +- UNSPEC_REVH_D +- + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH +@@ -3151,55 +3146,67 @@ + + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. + +-(define_insn "bswaphi2" +- [(set (match_operand:HI 0 "register_operand" "=r") +- (bswap:HI (match_operand:HI 1 "register_operand" "r")))] ++(define_insn "revb_2h" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16)))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn_and_split "bswapsi2" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (bswap:SI (match_operand:SI 1 "register_operand" "r")))] +- "" +- "#" +- "" +- [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_REVB_2H)) +- (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] +- "" +- [(set_attr "insn_count" "2")]) +- +-(define_insn_and_split "bswapdi2" ++(define_insn "revb_2h_extend" + [(set (match_operand:DI 0 "register_operand" "=r") +- (bswap:DI (match_operand:DI 1 "register_operand" "r")))] ++ (sign_extend:DI ++ (rotatert:SI ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")) ++ (const_int 16))))] + "TARGET_64BIT" +- "#" +- "" +- [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_REVB_4H)) +- (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_REVH_D))] +- "" +- [(set_attr "insn_count" "2")]) ++ "revb.2h\t%0,%1" ++ [(set_attr "type" "shift")]) + +-(define_insn "revb_2h" +- [(set (match_operand:SI 0 "register_operand" "=r") +- (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_REVB_2H))] ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "revb_4h" ++(define_insn "revb_2w" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVB_4H))] ++ (rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r")) ++ (const_int 32)))] + "TARGET_64BIT" +- "revb.4h\t%0,%1" ++ "revb.2w\t%0,%1" + [(set_attr "type" "shift")]) + +-(define_insn "revh_d" ++(define_insn "*bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "TARGET_64BIT" ++ "revb.2w\t%0,%1" ++ [(set_attr "type" "shift")]) ++ ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "" ++{ ++ if (!TARGET_64BIT) ++ { ++ rtx t = gen_reg_rtx (SImode); ++ emit_insn (gen_revb_2h (t, operands[1])); ++ emit_insn (gen_rotrsi3 (operands[0], t, GEN_INT (16))); ++ DONE; ++ } ++}) ++ ++(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_REVH_D))] ++ (bswap:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT" +- "revh.d\t%0,%1" ++ "revb.d\t%0,%1" + [(set_attr "type" "shift")]) ++ + + ;; + ;; .................... +diff --git a/gcc/testsuite/gcc.target/loongarch/revb.c b/gcc/testsuite/gcc.target/loongarch/revb.c +new file mode 100644 +index 000000000..27a5d0fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/revb.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++**t1: ++** revb.2w \$r4,\$r4 ++** slli.w \$r4,\$r4,0 ++** jr \$r1 ++*/ ++unsigned int ++t1 (unsigned int x) ++{ ++ return __builtin_bswap32 (x); ++} ++ ++/* ++**t2: ++** revb.d \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t2 (unsigned long x) ++{ ++ return __builtin_bswap64 (x); ++} ++ ++/* ++**t3: ++** revb.2h \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned int ++t3 (unsigned int x) ++{ ++ return (x >> 8) & 0xff00ff | (x << 8) & 0xff00ff00; ++} ++ ++/* ++**t4: ++** revb.2w \$r4,\$r4 ++** jr \$r1 ++*/ ++unsigned long ++t4 (unsigned long x) ++{ ++ x = __builtin_bswap64 (x); ++ return x << 32 | x >> 32; ++} ++ ++/* ++**t5: ++** revb.2h \$r4,\$r4 ++** bstrpick.w \$r4,\$r4,15,0 ++** jr \$r1 ++*/ ++unsigned short ++t5 (unsigned short x) ++{ ++ return __builtin_bswap16 (x); ++} +-- +2.43.0 + diff --git a/0180-testsuite-fix-dg-do-preprocess-typo.patch b/0180-testsuite-fix-dg-do-preprocess-typo.patch new file mode 100644 index 0000000..8efaca3 --- /dev/null +++ b/0180-testsuite-fix-dg-do-preprocess-typo.patch @@ -0,0 +1,26 @@ +From 35d804730dcac1b3e96db4b587de0cd77fec3504 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Tue, 30 Jul 2024 21:46:29 +0100 +Subject: [PATCH 180/188] testsuite: fix 'dg-do-preprocess' typo + +We want 'dg-do preprocess', not 'dg-do-preprocess'. Fix that. + + PR target/106828 + * g++.target/loongarch/pr106828.C: Fix 'dg-do compile' typo. +--- + gcc/testsuite/g++.target/loongarch/pr106828.C | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C +index 190c1db71..0d13cbbd5 100644 +--- a/gcc/testsuite/g++.target/loongarch/pr106828.C ++++ b/gcc/testsuite/g++.target/loongarch/pr106828.C +@@ -1,4 +1,4 @@ +-/* { dg-do-preprocess } */ ++/* { dg-do preprocess } */ + /* { dg-options "-mabi=lp64d -fsanitize=address" } */ + + /* Tests whether the compiler supports compile option '-fsanitize=address'. */ +-- +2.43.0 + diff --git a/0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch b/0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch new file mode 100644 index 0000000..eae11a0 --- /dev/null +++ b/0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch @@ -0,0 +1,47 @@ +From 643248a4c60c016af44bc740b35c7ac174849029 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Tue, 23 Jul 2024 10:04:26 +0800 +Subject: [PATCH 181/188] LoongArch: Remove gawk extension from a generator + script. + +gcc/ChangeLog: + + * config/loongarch/genopts/gen-evolution.awk: Do not use + "length()" to compute the size of an array. +--- + gcc/config/loongarch/genopts/gen-evolution.awk | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk +index 4d105afa9..1c8004e41 100644 +--- a/gcc/config/loongarch/genopts/gen-evolution.awk ++++ b/gcc/config/loongarch/genopts/gen-evolution.awk +@@ -1,4 +1,4 @@ +-#!/usr/bin/gawk ++#!/usr/bin/awk -f + # + # A simple script that generates loongarch-evolution.h + # from genopts/isa-evolution.in +@@ -94,8 +94,9 @@ function gen_cpucfg_useful_idx() + idx_bucket[cpucfg_word[i]] = 1 + + delete idx_list ++ j = 1 + for (i in idx_bucket) +- idx_list[length(idx_list)-1] = i+0 ++ idx_list[j++] = i+0 + delete idx_bucket + + asort (idx_list) +@@ -108,7 +109,7 @@ function gen_cpucfg_useful_idx() + print "" + + printf ("static constexpr int N_CPUCFG_WORDS = %d;\n", +- idx_list[length(idx_list)] + 1) ++ idx_list[j - 1] + 1) + + delete idx_list + } +-- +2.43.0 + diff --git a/0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch b/0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch new file mode 100644 index 0000000..74cc610 --- /dev/null +++ b/0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch @@ -0,0 +1,226 @@ +From 64560e75b4d020b6c47e07592595ceed663541af Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 1 Aug 2024 16:07:25 +0800 +Subject: [PATCH 182/188] LoongArch: Use iorn and andn standard pattern names. + +R15-1890 introduced new optabs iorc and andc, and its corresponding +internal functions BIT_{ANDC,IORC}, and if targets defines such optabs +for vector modes. And in r15-2258 the iorc and andc were renamed to +iorn and andn. +So we changed the andn and iorn implementation templates to the standard +template names. + +gcc/ChangeLog: + + * config/loongarch/lasx.md (xvandn3): Rename to ... + (andn3): This. + (xvorn3): Rename to ... + (iorn3): This. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vandn_v): Defined as the modified name. + (CODE_FOR_lsx_vorn_v): Likewise. + (CODE_FOR_lasx_xvandn_v): Likewise. + (CODE_FOR_lasx_xvorn_v): Likewise. + (loongarch_expand_builtin_insn): When the builtin function to be + called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the + two operands. + * config/loongarch/loongarch.md (n): Rename to ... + (n3): This. + * config/loongarch/lsx.md (vandn3): Rename to ... + (andn3): This. + (vorn3): Rename to ... + (iorn3): This. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/lasx-andn-iorn.c: New test. + * gcc.target/loongarch/lsx-andn-iorn.c: New test. +--- + gcc/config/loongarch/lasx.md | 10 +++---- + gcc/config/loongarch/loongarch-builtins.cc | 10 ++++--- + gcc/config/loongarch/loongarch.md | 8 +++--- + gcc/config/loongarch/lsx.md | 10 +++---- + .../gcc.target/loongarch/lasx-andn-iorn.c | 11 ++++++++ + .../gcc.target/loongarch/lsx-andn-iorn.c | 28 +++++++++++++++++++ + 6 files changed, 59 insertions(+), 18 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 44a7d58ff..3775155ca 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -2716,12 +2716,12 @@ + (set_attr "mode" "V4DI")]) + + ;; Extend loongson-sx to loongson-asx. +-(define_insn "xvandn3" ++(define_insn "andn3" + [(set (match_operand:LASX 0 "register_operand" "=f") +- (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f")) +- (match_operand:LASX 2 "register_operand" "f")))] ++ (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f")) ++ (match_operand:LASX 1 "register_operand" "f")))] + "ISA_HAS_LASX" +- "xvandn.v\t%u0,%u1,%u2" ++ "xvandn.v\t%u0,%u2,%u1" + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +@@ -4637,7 +4637,7 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "xvorn3" ++(define_insn "iorn3" + [(set (match_operand:ILASX 0 "register_operand" "=f") + (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f")) + (match_operand:ILASX 1 "register_operand" "f")))] +diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc +index 51abba007..f9ff85d2e 100644 +--- a/gcc/config/loongarch/loongarch-builtins.cc ++++ b/gcc/config/loongarch/loongarch-builtins.cc +@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du + #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s + #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d +-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3 +-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3 ++#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3 ++#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3 + #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2 + #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2 + #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2 +@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) + #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si + #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di + +-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3 +-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3 ++#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3 ++#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3 + #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2 + #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2 + #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2 +@@ -2853,6 +2853,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lsx_vpickod_b: + case CODE_FOR_lsx_vpickod_h: + case CODE_FOR_lsx_vpickod_w: ++ case CODE_FOR_lsx_vandn_v: + case CODE_FOR_lasx_xvilvh_b: + case CODE_FOR_lasx_xvilvh_h: + case CODE_FOR_lasx_xvilvh_w: +@@ -2873,6 +2874,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + case CODE_FOR_lasx_xvpickod_b: + case CODE_FOR_lasx_xvpickod_h: + case CODE_FOR_lasx_xvpickod_w: ++ case CODE_FOR_lasx_xvandn_v: + /* Swap the operands 1 and 2 for interleave operations. Built-ins follow + convention of ISA, which have op1 as higher component and op2 as lower + component. However, the VEC_PERM op in tree and vec_concat in RTL +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index b1c828dba..58c8f28ed 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1701,13 +1701,13 @@ + [(set_attr "type" "logical") + (set_attr "mode" "SI")]) + +-(define_insn "n" ++(define_insn "n3" + [(set (match_operand:X 0 "register_operand" "=r") + (neg_bitwise:X +- (not:X (match_operand:X 1 "register_operand" "r")) +- (match_operand:X 2 "register_operand" "r")))] ++ (not:X (match_operand:X 2 "register_operand" "r")) ++ (match_operand:X 1 "register_operand" "r")))] + "" +- "n\t%0,%2,%1" ++ "n\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index 2eac11473..c7480aafd 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -2344,12 +2344,12 @@ + } + [(set_attr "mode" "V4SF")]) + +-(define_insn "vandn3" ++(define_insn "andn3" + [(set (match_operand:LSX 0 "register_operand" "=f") +- (and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f")) +- (match_operand:LSX 2 "register_operand" "f")))] ++ (and:LSX (not:LSX (match_operand:LSX 2 "register_operand" "f")) ++ (match_operand:LSX 1 "register_operand" "f")))] + "ISA_HAS_LSX" +- "vandn.v\t%w0,%w1,%w2" ++ "vandn.v\t%w0,%w2,%w1" + [(set_attr "type" "simd_logic") + (set_attr "mode" "")]) + +@@ -3028,7 +3028,7 @@ + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +-(define_insn "vorn3" ++(define_insn "iorn3" + [(set (match_operand:ILSX 0 "register_operand" "=f") + (ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f")) + (match_operand:ILSX 1 "register_operand" "f")))] +diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +new file mode 100644 +index 000000000..4aa5f19a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c +@@ -0,0 +1,11 @@ ++#define N 8 ++ ++#include "./lsx-andn-iorn.c" ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlasx -ftree-vectorize" } */ ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +new file mode 100644 +index 000000000..7bceccd37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlsx -ftree-vectorize" } */ ++ ++#ifndef N ++#define N 4 ++#endif ++ ++extern float a[N], b[N]; ++extern int c[N], d[N]; ++ ++void ++bar1 (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = a[i] > b[i] ? 0 : c[i]; ++} ++ ++void ++bar2 (void) ++{ ++ for (int i = 0; i < N; i++) ++ d[i] = a[i] > b[i] ? c[i]: -1; ++} ++ ++/* We should produce a BIT_ANDC and BIT_IORC here. */ ++ ++/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */ ++/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */ +-- +2.43.0 + diff --git a/0183-LoongArch-Drop-vcond-u-expanders.patch b/0183-LoongArch-Drop-vcond-u-expanders.patch new file mode 100644 index 0000000..3085889 --- /dev/null +++ b/0183-LoongArch-Drop-vcond-u-expanders.patch @@ -0,0 +1,127 @@ +From 8394519779553a2c59214d76054dd1ba87a380b3 Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 8 Aug 2024 10:39:54 +0800 +Subject: [PATCH 183/188] LoongArch: Drop vcond{,u} expanders. + +Optabs vcond{,u} will be removed for GCC 15. Since regtest shows no +fallout, dropping the expanders, now. + +gcc/ChangeLog: + + PR target/114189 + * config/loongarch/lasx.md (vcondu): Delete. + (vcond): Likewise. + * config/loongarch/lsx.md (vcondu): Likewise. + (vcond): Likewise. +--- + gcc/config/loongarch/lasx.md | 37 ------------------------------------ + gcc/config/loongarch/lsx.md | 31 ------------------------------ + 2 files changed, 68 deletions(-) + +diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md +index 3775155ca..be2f6ca8e 100644 +--- a/gcc/config/loongarch/lasx.md ++++ b/gcc/config/loongarch/lasx.md +@@ -165,9 +165,6 @@ + ;; All vector modes with 256 bits. + (define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI]) + +-;; Same as LASX. Used by vcond to iterate two modes. +-(define_mode_iterator LASX_2 [V4DF V8SF V4DI V8SI V16HI V32QI]) +- + ;; Only used for splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LASX_D [V4DI V4DF]) + +@@ -762,40 +759,6 @@ + DONE; + }) + +-;; FIXME: 256?? +-(define_expand "vcondu" +- [(match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:ILASX 4 "register_operand") +- (match_operand:ILASX 5 "register_operand")])] +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (mode) +- == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, +- operands); +- DONE; +-}) +- +-;; FIXME: 256?? +-(define_expand "vcond" +- [(match_operand:LASX 0 "register_operand") +- (match_operand:LASX 1 "reg_or_m1_operand") +- (match_operand:LASX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:LASX_2 4 "register_operand") +- (match_operand:LASX_2 5 "register_operand")])] +- "ISA_HAS_LASX +- && (GET_MODE_NUNITS (mode) +- == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, +- operands); +- DONE; +-}) +- + ;; Same as vcond_ + (define_expand "vcond_mask_" + [(match_operand:LASX 0 "register_operand") +diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md +index c7480aafd..5cb5bc61f 100644 +--- a/gcc/config/loongarch/lsx.md ++++ b/gcc/config/loongarch/lsx.md +@@ -186,9 +186,6 @@ + ;; All vector modes with 128 bits. + (define_mode_iterator LSX [V2DF V4SF V2DI V4SI V8HI V16QI]) + +-;; Same as LSX. Used by vcond to iterate two modes. +-(define_mode_iterator LSX_2 [V2DF V4SF V2DI V4SI V8HI V16QI]) +- + ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d. + (define_mode_iterator LSX_D [V2DI V2DF]) + +@@ -533,34 +530,6 @@ + DONE; + }) + +-(define_expand "vcondu" +- [(match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:ILSX 4 "register_operand") +- (match_operand:ILSX 5 "register_operand")])] +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, operands); +- DONE; +-}) +- +-(define_expand "vcond" +- [(match_operand:LSX 0 "register_operand") +- (match_operand:LSX 1 "reg_or_m1_operand") +- (match_operand:LSX 2 "reg_or_0_operand") +- (match_operator 3 "" +- [(match_operand:LSX_2 4 "register_operand") +- (match_operand:LSX_2 5 "register_operand")])] +- "ISA_HAS_LSX +- && (GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode))" +-{ +- loongarch_expand_vec_cond_expr (mode, mode, operands); +- DONE; +-}) +- + (define_expand "vcond_mask_" + [(match_operand:LSX 0 "register_operand") + (match_operand:LSX 1 "reg_or_m1_operand") +-- +2.43.0 + diff --git a/0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch b/0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch new file mode 100644 index 0000000..ec6f308 --- /dev/null +++ b/0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch @@ -0,0 +1,220 @@ +From d9ce0e85c8cba331413c6a521987a1ecbd94df1c Mon Sep 17 00:00:00 2001 +From: Lulu Cheng +Date: Thu, 8 Aug 2024 09:59:28 +0800 +Subject: [PATCH 184/188] LoongArch: Provide ashr lshr and ashl RTL pattern for + vectors. + +We support vashr vlshr and vashl. However, in r15-1638 support optimize +x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31. +To support this optimization, vector ashr lshr and ashl need to be implemented. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (insn): Added rotatert rotr pairs. + * config/loongarch/simd.md (rotr3): Remove to ... + (3): This. + +gcc/testsuite/ChangeLog: + + * g++.target/loongarch/vect-ashr-lshr.C: New test. +--- + gcc/config/loongarch/loongarch.md | 1 + + gcc/config/loongarch/simd.md | 13 +- + .../g++.target/loongarch/vect-ashr-lshr.C | 147 ++++++++++++++++++ + 3 files changed, 155 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 58c8f28ed..867977b36 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -559,6 +559,7 @@ + (define_code_attr insn [(ashift "sll") + (ashiftrt "sra") + (lshiftrt "srl") ++ (rotatert "rotr") + (ior "or") + (xor "xor") + (and "and") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index 00d4c7831..c28b95282 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -306,14 +306,15 @@ + operands[4] = gen_reg_rtx (mode); + }); + +-;; vrotri.{b/h/w/d} ++;; v{rotr/sll/sra/srl}i.{b/h/w/d} + +-(define_insn "rotr3" ++(define_insn "3" + [(set (match_operand:IVEC 0 "register_operand" "=f") +- (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f") +- (match_operand:SI 2 "const__operand")))] +- "" +- "vrotri.\t%0,%1,%2"; ++ (shift_w:IVEC ++ (match_operand:IVEC 1 "register_operand" "f") ++ (match_operand:SI 2 "const__operand")))] ++ "ISA_HAS_LSX" ++ "vi.\t%0,%1,%2" + [(set_attr "type" "simd_int_arith") + (set_attr "mode" "")]) + +diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +new file mode 100644 +index 000000000..bcef985fa +--- /dev/null ++++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C +@@ -0,0 +1,147 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mlasx -O2" } */ ++/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */ ++/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */ ++ ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef signed char v32qi __attribute__((vector_size(32))); ++typedef short v8hi __attribute__((vector_size(16))); ++typedef short v16hi __attribute__((vector_size(32))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef int v8si __attribute__((vector_size(32))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef long long v4di __attribute__((vector_size(32))); ++ ++v16qi ++foo (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo2 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo3 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo4 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo5 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){1,1,1,1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo6 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo7 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){1,1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo8 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){1,1,1,1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16qi ++foo9 (v16qi a) ++{ ++ v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v32qi ++foo10 (v32qi a) ++{ ++ v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8hi ++foo11 (v8hi a) ++{ ++ v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v16hi ++foo12 (v16hi a) ++{ ++ v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; ++ v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4si ++foo13 (v4si a) ++{ ++ v4si const1_op = __extension__(v4si){-1,-1,-1,-1}; ++ v4si const0_op = __extension__(v4si){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v8si ++foo14 (v8si a) ++{ ++ v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1}; ++ v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v2di ++foo15 (v2di a) ++{ ++ v2di const1_op = __extension__(v2di){-1,-1}; ++ v2di const0_op = __extension__(v2di){0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} ++ ++v4di ++foo16 (v4di a) ++{ ++ v4di const1_op = __extension__(v4di){-1,-1,-1,-1}; ++ v4di const0_op = __extension__(v4di){0,0,0,0}; ++ return a < const0_op ? const1_op : const0_op; ++} +-- +2.43.0 + diff --git a/0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch b/0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch new file mode 100644 index 0000000..b0ef743 --- /dev/null +++ b/0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch @@ -0,0 +1,203 @@ +From 7e8e122306feaecf8d7b520b4e7c0b9908ca6fd2 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 4 Jul 2024 02:49:28 +0800 +Subject: [PATCH 185/188] LoongArch: Implement scalar isinf, isnormal, and + isfinite via fclass + +Doing so can avoid loading FP constants from the memory. It also +partially fixes PR 66262 as fclass does not signal on sNaN. + +gcc/ChangeLog: + + * config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f") + alternative and use movfr2gr.s for it. The spec clearly states + movfr2gr.s sign extends the value to GRLEN. + (fclass_): Make the result SImode instead of a floating + mode. The fclass results are really not FP values. + (FCLASS_MASK): New define_int_iterator. + (fclass_optab): New define_int_attr. + (): New define_expand + template. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/fclass-compile.c: New test. + * gcc.target/loongarch/fclass-run.c: New test. +--- + gcc/config/loongarch/loongarch.md | 53 ++++++++++++++++--- + .../gcc.target/loongarch/fclass-compile.c | 20 +++++++ + .../gcc.target/loongarch/fclass-run.c | 53 +++++++++++++++++++ + 3 files changed, 119 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c + create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c + +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 867977b36..15960a79f 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -1851,16 +1851,17 @@ + ;; .................... + + (define_insn "extendsidi2" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (sign_extend:DI +- (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))] ++ (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f")))] + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + ldptr.w\t%0,%1 + ld.w\t%0,%1 +- ldx.w\t%0,%1" +- [(set_attr "move_type" "sll0,load,load,load") ++ ldx.w\t%0,%1 ++ movfr2gr.s\t%0,%1" ++ [(set_attr "move_type" "sll0,load,load,load,mftg") + (set_attr "mode" "DI")]) + + (define_insn "extend2" +@@ -4110,14 +4111,52 @@ + "movgr2fcsr\t$r%0,%1") + + (define_insn "fclass_" +- [(set (match_operand:ANYF 0 "register_operand" "=f") +- (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] +- UNSPEC_FCLASS))] ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (unspec:SI [(match_operand:ANYF 1 "register_operand" "f")] ++ UNSPEC_FCLASS))] + "TARGET_HARD_FLOAT" + "fclass.\t%0,%1" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + ++(define_int_iterator FCLASS_MASK [68 136 952]) ++(define_int_attr fclass_optab ++ [(68 "isinf") ++ (136 "isnormal") ++ (952 "isfinite")]) ++ ++(define_expand "2" ++ [(match_operand:SI 0 "register_operand" "=r") ++ (match_operand:ANYF 1 "register_operand" " f") ++ (const_int FCLASS_MASK)] ++ "TARGET_HARD_FLOAT" ++ { ++ rtx ft0 = gen_reg_rtx (SImode); ++ rtx t0 = gen_reg_rtx (word_mode); ++ rtx mask = GEN_INT (); ++ ++ emit_insn (gen_fclass_ (ft0, operands[1])); ++ ++ if (TARGET_64BIT) ++ emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0)); ++ else ++ emit_move_insn (t0, ft0); ++ ++ emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask)); ++ emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx)); ++ ++ if (TARGET_64BIT) ++ { ++ t0 = lowpart_subreg (SImode, t0, DImode); ++ SUBREG_PROMOTED_VAR_P (t0) = 1; ++ SUBREG_PROMOTED_SET (t0, SRP_SIGNED); ++ } ++ ++ emit_move_insn (operands[0], t0); ++ ++ DONE; ++ }) ++ + (define_insn "bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +new file mode 100644 +index 000000000..9c24d6e26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d" } */ ++/* { dg-final { scan-assembler-times "fclass\\.s" 1 } } */ ++/* { dg-final { scan-assembler-times "fclass\\.d" 1 } } */ ++ ++__attribute__ ((noipa)) int ++test_fclass_f (float f) ++{ ++ return __builtin_isinf (f) ++ | __builtin_isnormal (f) << 1 ++ | __builtin_isfinite (f) << 2; ++} ++ ++__attribute__ ((noipa)) int ++test_fclass_d (double d) ++{ ++ return __builtin_isinf (d) ++ | __builtin_isnormal (d) << 1 ++ | __builtin_isfinite (d) << 2; ++} +diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-run.c b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +new file mode 100644 +index 000000000..e5585f9d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/fclass-run.c +@@ -0,0 +1,53 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fsignaling-nans -D_GNU_SOURCE -std=c23" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include ++#include "fclass-compile.c" ++ ++#define ASSERT_EQ(x, y) (void)(x == y || (__builtin_abort (), 1)) ++ ++int ++main (void) ++{ ++ volatile float f_inf = __builtin_inff (); ++ volatile float f_zero = 0; ++ volatile float f_normal = 114.514; ++ volatile float f_subnormal = 1e-40; ++ volatile float f_qnan = __builtin_nanf (""); ++ volatile float f_snan = __builtin_nansf (""); ++ volatile double d_inf = __builtin_inf (); ++ volatile double d_zero = 0; ++ volatile double d_normal = 1919.810; ++ volatile double d_subnormal = 1e-320; ++ volatile double d_qnan = __builtin_nan (""); ++ volatile double d_snan = __builtin_nans (""); ++ ++#if __loongarch_frlen >= 64 ++ /* With fclass.{s/d} we shouldn't signal, even if the input is sNaN. ++ PR 66462. */ ++ feenableexcept (FE_INVALID); ++#endif ++ ++ ASSERT_EQ (test_fclass_f (f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (-f_inf), 0b001); ++ ASSERT_EQ (test_fclass_f (f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_zero), 0b100); ++ ASSERT_EQ (test_fclass_f (f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (-f_normal), 0b110); ++ ASSERT_EQ (test_fclass_f (f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (-f_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_f (f_qnan), 0); ++ ASSERT_EQ (test_fclass_f (f_snan), 0); ++ ++ ASSERT_EQ (test_fclass_d (d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (-d_inf), 0b001); ++ ASSERT_EQ (test_fclass_d (d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_zero), 0b100); ++ ASSERT_EQ (test_fclass_d (d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (-d_normal), 0b110); ++ ASSERT_EQ (test_fclass_d (d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (-d_subnormal), 0b100); ++ ASSERT_EQ (test_fclass_d (d_qnan), 0); ++ ASSERT_EQ (test_fclass_d (d_snan), 0); ++} +-- +2.43.0 + diff --git a/0186-LoongArch-Add-support-to-annotate-tablejump.patch b/0186-LoongArch-Add-support-to-annotate-tablejump.patch new file mode 100644 index 0000000..cf41bc0 --- /dev/null +++ b/0186-LoongArch-Add-support-to-annotate-tablejump.patch @@ -0,0 +1,155 @@ +From 5079c41ada379bd8d1bdb92dd2b91e72e9496ea6 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Thu, 11 Jul 2024 19:43:48 +0800 +Subject: [PATCH 186/188] LoongArch: Add support to annotate tablejump + +This is per the request from the kernel developers. For generating the +ORC unwind info, the objtool program needs to analysis the control flow +of a .o file. If a jump table is used, objtool has to correlate the +jump instruction with the table. + +On x86 (where objtool was initially developed) it's simple: a relocation +entry natrually correlates them because one single instruction is used +for table-based jump. But on an RISC machine objtool would have to +reconstruct the data flow if it must find out the correlation on its +own. + +So, emit an additional section to store the correlation info as pairs of +addresses, each pair contains the address of a jump instruction (jr) and +the address of the jump table. This is very trivial to implement in +GCC. + +gcc/ChangeLog: + + * config/loongarch/genopts/loongarch.opt.in + (mannotate-tablejump): New option. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.md (tablejump): Emit + additional correlation info between the jump instruction and the + jump table, if -mannotate-tablejump. + * doc/invoke.texi: Document -mannotate-tablejump. + +gcc/testsuite/ChangeLog: + + * gcc.target/loongarch/jump-table-annotate.c: New test. + +Suggested-by: Tiezhu Yang +--- + gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ + gcc/config/loongarch/loongarch.md | 12 +++++++++++- + gcc/config/loongarch/loongarch.opt | 4 ++++ + gcc/doc/invoke.texi | 13 ++++++++++++- + .../gcc.target/loongarch/jump-table-annotate.c | 15 +++++++++++++++ + 5 files changed, 46 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c + +diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in +index 0ecd10922..20795f6bd 100644 +--- a/gcc/config/loongarch/genopts/loongarch.opt.in ++++ b/gcc/config/loongarch/genopts/loongarch.opt.in +@@ -301,3 +301,7 @@ default value is 4. + ; CPUCFG independently, so we use bit flags to specify them. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 ++ ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 15960a79f..66236a7c7 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -3496,12 +3496,22 @@ + DONE; + }) + ++(define_mode_attr mode_size [(DI "8") (SI "4")]) ++ + (define_insn "@tablejump" + [(set (pc) + (match_operand:P 0 "register_operand" "e")) + (use (label_ref (match_operand 1 "" "")))] + "" +- "jr\t%0" ++ { ++ return TARGET_ANNOTATE_TABLEJUMP ++ ? "1:jr\t%0\n\t" ++ ".pushsection\t.discard.tablejump_annotate\n\t" ++ "\t.byte\t1b\n\t" ++ "\t.byte\t%1\n\t" ++ ".popsection" ++ : "jr\t%0"; ++ } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 69b3b965c..16fed6ec3 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -310,6 +310,10 @@ default value is 4. + TargetVariable + HOST_WIDE_INT la_isa_evolution = 0 + ++mannotate-tablejump ++Target Mask(ANNOTATE_TABLEJUMP) Save ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++ + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) + Support frecipe.{s/d} and frsqrte.{s/d} instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index f6d59317b..d2c52cdf4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1011,7 +1011,7 @@ Objective-C and Objective-C++ Dialects}. + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol + -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol +--mtls-dialect=@var{opt}} ++-mtls-dialect=@var{opt} -mannotate-tablejump -mno-annotate-tablejump} + + @emph{M32R/D Options} + @gccoptlist{-m32r2 -m32rx -m32r @gol +@@ -24750,6 +24750,17 @@ Whether a load-load barrier (@code{dbar 0x700}) is needed. When build with + This option controls which tls dialect may be used for general dynamic and + local dynamic TLS models. + ++@opindex mannotate-tablejump ++@opindex mno-annotate-tablejump ++@item -mannotate-tablejump ++@itemx -mno-annotate-tablejump ++Create an annotation section @code{.discard.tablejump_annotate} to ++correlate the @code{jirl} instruction and the jump table when a jump ++table is used to optimize the @code{switch} statement. Some external ++tools, for example @file{objtool} of the Linux kernel building system, ++need the annotation to analysis the control flow. The default is ++@option{-mno-annotate-tablejump}. ++ + @table @samp + @item trad + Use traditional TLS. This is the default. +diff --git a/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +new file mode 100644 +index 000000000..9d58e60e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/loongarch/jump-table-annotate.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-mannotate-tablejump" } */ ++ ++extern void asdf(int); ++void foo(int x) { ++ switch (x) { ++ case 0: asdf(10); break; ++ case 1: asdf(11); break; ++ case 2: asdf(12); break; ++ case 3: asdf(13); break; ++ case 4: asdf(14); break; ++ } ++} ++ ++/* { dg-final { scan-assembler "\\.discard\\.tablejump_annotate" } } */ +-- +2.43.0 + diff --git a/0187-LoongArch-Fix-up-r15-4130.patch b/0187-LoongArch-Fix-up-r15-4130.patch new file mode 100644 index 0000000..67649db --- /dev/null +++ b/0187-LoongArch-Fix-up-r15-4130.patch @@ -0,0 +1,32 @@ +From 8cdf96cd61612746262a811b8a091ecab27bd3a1 Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 10 Jul 2024 12:15:23 +0800 +Subject: [PATCH 187/188] LoongArch: Fix up r15-4130 + +An earlier version of the patch (lacking the regeneration of some files) +was pushed. Fix it up now. + +gcc/ChangeLog: + + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. +--- + gcc/config/loongarch/loongarch.opt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt +index 16fed6ec3..f9c7bd446 100644 +--- a/gcc/config/loongarch/loongarch.opt ++++ b/gcc/config/loongarch/loongarch.opt +@@ -312,7 +312,7 @@ HOST_WIDE_INT la_isa_evolution = 0 + + mannotate-tablejump + Target Mask(ANNOTATE_TABLEJUMP) Save +-Annotate table jump instruction (jr {reg}) to correlate it with the jump table ++Annotate table jump instruction (jr {reg}) to correlate it with the jump table. + + mfrecipe + Target Mask(ISA_FRECIPE) Var(la_isa_evolution) +-- +2.43.0 + diff --git a/0188-libphobos-Update-build-scripts-for-LoongArch64.patch b/0188-libphobos-Update-build-scripts-for-LoongArch64.patch new file mode 100644 index 0000000..0af2b6b --- /dev/null +++ b/0188-libphobos-Update-build-scripts-for-LoongArch64.patch @@ -0,0 +1,304 @@ +From 46e279e1c79086e930965c9a15d08b70a2c06a80 Mon Sep 17 00:00:00 2001 +From: Yang Yujie +Date: Mon, 28 Oct 2024 01:53:57 +0000 +Subject: [PATCH 188/188] libphobos: Update build scripts for LoongArch64. + +libphobos/ChangeLog: + + * m4/druntime/cpu.m4: Support loongarch* targets. + * libdruntime/Makefile.am: Same. + * libdruntime/Makefile.in: Regenerate. + * configure: Regenerate. +--- + libphobos/configure | 21 ++++++- + libphobos/libdruntime/Makefile.am | 3 + + libphobos/libdruntime/Makefile.in | 94 +++++++++++++++++++------------ + libphobos/m4/druntime/cpu.m4 | 5 ++ + 4 files changed, 85 insertions(+), 38 deletions(-) + +diff --git a/libphobos/configure b/libphobos/configure +index 9da06f087..6acb2dd89 100755 +--- a/libphobos/configure ++++ b/libphobos/configure +@@ -696,6 +696,8 @@ DRUNTIME_CPU_POWERPC_FALSE + DRUNTIME_CPU_POWERPC_TRUE + DRUNTIME_CPU_MIPS_FALSE + DRUNTIME_CPU_MIPS_TRUE ++DRUNTIME_CPU_LOONGARCH_FALSE ++DRUNTIME_CPU_LOONGARCH_TRUE + DRUNTIME_CPU_ARM_FALSE + DRUNTIME_CPU_ARM_TRUE + DRUNTIME_CPU_AARCH64_FALSE +@@ -11750,7 +11752,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11753 "configure" ++#line 11755 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -11856,7 +11858,7 @@ else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +-#line 11859 "configure" ++#line 11861 "configure" + #include "confdefs.h" + + #if HAVE_DLFCN_H +@@ -14137,6 +14139,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + ;; + mips*) druntime_target_cpu_parsed="mips" + ;; ++ loongarch*) ++ druntime_target_cpu_parsed="loongarch" ++ ;; + powerpc*) + druntime_target_cpu_parsed="powerpc" + ;; +@@ -14174,6 +14179,14 @@ else + DRUNTIME_CPU_MIPS_FALSE= + fi + ++ if test "$druntime_target_cpu_parsed" = "loongarch"; then ++ DRUNTIME_CPU_LOONGARCH_TRUE= ++ DRUNTIME_CPU_LOONGARCH_FALSE='#' ++else ++ DRUNTIME_CPU_LOONGARCH_TRUE='#' ++ DRUNTIME_CPU_LOONGARCH_FALSE= ++fi ++ + if test "$druntime_target_cpu_parsed" = "powerpc"; then + DRUNTIME_CPU_POWERPC_TRUE= + DRUNTIME_CPU_POWERPC_FALSE='#' +@@ -15738,6 +15751,10 @@ if test -z "${DRUNTIME_CPU_MIPS_TRUE}" && test -z "${DRUNTIME_CPU_MIPS_FALSE}"; + as_fn_error $? "conditional \"DRUNTIME_CPU_MIPS\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 + fi ++if test -z "${DRUNTIME_CPU_LOONGARCH_TRUE}" && test -z "${DRUNTIME_CPU_LOONGARCH_FALSE}"; then ++ as_fn_error $? "conditional \"DRUNTIME_CPU_LOONGARCH\" was never defined. ++Usually this means the macro was only invoked conditionally." "$LINENO" 5 ++fi + if test -z "${DRUNTIME_CPU_POWERPC_TRUE}" && test -z "${DRUNTIME_CPU_POWERPC_FALSE}"; then + as_fn_error $? "conditional \"DRUNTIME_CPU_POWERPC\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 +diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am +index 6ca4012b7..65e3f1b44 100644 +--- a/libphobos/libdruntime/Makefile.am ++++ b/libphobos/libdruntime/Makefile.am +@@ -86,6 +86,9 @@ endif + if DRUNTIME_CPU_MIPS + DRUNTIME_SOURCES_CONFIGURED += config/mips/switchcontext.S + endif ++if DRUNTIME_CPU_LOONGARCH ++ DRUNTIME_SOURCES_CONFIGURED += config/loongarch/switchcontext.S ++endif + if DRUNTIME_CPU_POWERPC + DRUNTIME_SOURCES_CONFIGURED += config/powerpc/switchcontext.S + endif +diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in +index f7f78d71f..91cd65362 100644 +--- a/libphobos/libdruntime/Makefile.in ++++ b/libphobos/libdruntime/Makefile.in +@@ -124,12 +124,13 @@ target_triplet = @target@ + # CPU specific sources + @DRUNTIME_CPU_AARCH64_TRUE@am__append_11 = config/aarch64/switchcontext.S + @DRUNTIME_CPU_ARM_TRUE@am__append_12 = config/arm/switchcontext.S +-@DRUNTIME_CPU_MIPS_TRUE@am__append_13 = config/mips/switchcontext.S +-@DRUNTIME_CPU_POWERPC_TRUE@am__append_14 = config/powerpc/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_15 = config/mingw/switchcontext.S +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_16 = config/x86/switchcontext.S +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_17 = config/systemz/get_tls_offset.S +-@DRUNTIME_CPU_S390_TRUE@am__append_18 = config/s390/get_tls_offset.S ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__append_13 = config/loongarch/switchcontext.S ++@DRUNTIME_CPU_MIPS_TRUE@am__append_14 = config/mips/switchcontext.S ++@DRUNTIME_CPU_POWERPC_TRUE@am__append_15 = config/powerpc/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__append_16 = config/mingw/switchcontext.S ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__append_17 = config/x86/switchcontext.S ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__append_18 = config/systemz/get_tls_offset.S ++@DRUNTIME_CPU_S390_TRUE@am__append_19 = config/s390/get_tls_offset.S + subdir = libdruntime + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -474,45 +475,49 @@ am__objects_22 = core/sys/solaris/dlfcn.lo core/sys/solaris/elf.lo \ + @DRUNTIME_OS_SOLARIS_TRUE@am__objects_23 = $(am__objects_22) + @DRUNTIME_CPU_AARCH64_TRUE@am__objects_24 = config/aarch64/libgdruntime_la-switchcontext.lo + @DRUNTIME_CPU_ARM_TRUE@am__objects_25 = config/arm/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_26 = config/mips/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_27 = config/powerpc/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_28 = config/mingw/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_29 = config/x86/libgdruntime_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_30 = config/systemz/libgdruntime_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_31 = config/s390/libgdruntime_la-get_tls_offset.lo +-am__objects_32 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_26 = config/loongarch/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_27 = config/mips/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_28 = config/powerpc/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_29 = config/mingw/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_30 = config/x86/libgdruntime_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_31 = config/systemz/libgdruntime_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_32 = config/s390/libgdruntime_la-get_tls_offset.lo ++am__objects_33 = $(am__objects_6) $(am__objects_8) $(am__objects_10) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_21) \ + $(am__objects_23) $(am__objects_24) $(am__objects_25) \ + $(am__objects_26) $(am__objects_27) $(am__objects_28) \ +- $(am__objects_29) $(am__objects_30) $(am__objects_31) +-am__objects_33 = gcc/config.lo gcc/libbacktrace.lo +-am__objects_34 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ +- $(am__objects_32) $(am__objects_33) +-am_libgdruntime_la_OBJECTS = $(am__objects_34) ++ $(am__objects_29) $(am__objects_30) $(am__objects_31) \ ++ $(am__objects_32) ++am__objects_34 = gcc/config.lo gcc/libbacktrace.lo ++am__objects_35 = $(am__objects_1) $(am__objects_2) $(am__objects_3) \ ++ $(am__objects_33) $(am__objects_34) ++am_libgdruntime_la_OBJECTS = $(am__objects_35) + libgdruntime_la_OBJECTS = $(am_libgdruntime_la_OBJECTS) + am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +-am__objects_35 = core/stdc/libgdruntime_convenience_la-errno_.lo +-@DRUNTIME_OS_MINGW_TRUE@am__objects_36 = $(am__objects_20) \ ++am__objects_36 = core/stdc/libgdruntime_convenience_la-errno_.lo ++@DRUNTIME_OS_MINGW_TRUE@am__objects_37 = $(am__objects_20) \ + @DRUNTIME_OS_MINGW_TRUE@ config/mingw/libgdruntime_convenience_la-msvc.lo +-@DRUNTIME_CPU_AARCH64_TRUE@am__objects_37 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_ARM_TRUE@am__objects_38 = config/arm/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_MIPS_TRUE@am__objects_39 = config/mips/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_POWERPC_TRUE@am__objects_40 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_41 = config/mingw/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_42 = config/x86/libgdruntime_convenience_la-switchcontext.lo +-@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_43 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo +-@DRUNTIME_CPU_S390_TRUE@am__objects_44 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo +-am__objects_45 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ ++@DRUNTIME_CPU_AARCH64_TRUE@am__objects_38 = config/aarch64/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_ARM_TRUE@am__objects_39 = config/arm/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_LOONGARCH_TRUE@am__objects_40 = config/loongarch/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_MIPS_TRUE@am__objects_41 = config/mips/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_POWERPC_TRUE@am__objects_42 = config/powerpc/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_TRUE@am__objects_43 = config/mingw/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_X86_TRUE@@DRUNTIME_OS_MINGW_FALSE@am__objects_44 = config/x86/libgdruntime_convenience_la-switchcontext.lo ++@DRUNTIME_CPU_SYSTEMZ_TRUE@am__objects_45 = config/systemz/libgdruntime_convenience_la-get_tls_offset.lo ++@DRUNTIME_CPU_S390_TRUE@am__objects_46 = config/s390/libgdruntime_convenience_la-get_tls_offset.lo ++am__objects_47 = $(am__objects_5) $(am__objects_7) $(am__objects_9) \ + $(am__objects_11) $(am__objects_13) $(am__objects_15) \ + $(am__objects_17) $(am__objects_19) $(am__objects_36) \ + $(am__objects_23) $(am__objects_37) $(am__objects_38) \ + $(am__objects_39) $(am__objects_40) $(am__objects_41) \ +- $(am__objects_42) $(am__objects_43) $(am__objects_44) +-am__objects_46 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ +- $(am__objects_45) $(am__objects_33) +-am__objects_47 = $(am__objects_46) +-am_libgdruntime_convenience_la_OBJECTS = $(am__objects_47) ++ $(am__objects_42) $(am__objects_43) $(am__objects_44) \ ++ $(am__objects_45) $(am__objects_46) ++am__objects_48 = $(am__objects_1) $(am__objects_35) $(am__objects_3) \ ++ $(am__objects_47) $(am__objects_33) ++am__objects_49 = $(am__objects_48) ++am_libgdruntime_convenience_la_OBJECTS = $(am__objects_49) + libgdruntime_convenience_la_OBJECTS = \ + $(am_libgdruntime_convenience_la_OBJECTS) + AM_V_P = $(am__v_P_@AM_V@) +@@ -787,7 +792,7 @@ DRUNTIME_SOURCES_CONFIGURED = $(am__append_1) $(am__append_2) \ + $(am__append_9) $(am__append_10) $(am__append_11) \ + $(am__append_12) $(am__append_13) $(am__append_14) \ + $(am__append_15) $(am__append_16) $(am__append_17) \ +- $(am__append_18) ++ $(am__append_18) $(am__append_19) + + # Provide __start_minfo, __stop_minfo if linker doesn't. + @DRUNTIME_OS_MINFO_BRACKETING_FALSE@DRTSTUFF = gcc/drtbegin.o gcc/drtend.o +@@ -1900,6 +1905,11 @@ config/arm/$(am__dirstamp): + @: > config/arm/$(am__dirstamp) + config/arm/libgdruntime_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) ++config/loongarch/$(am__dirstamp): ++ @$(MKDIR_P) config/loongarch ++ @: > config/loongarch/$(am__dirstamp) ++config/loongarch/libgdruntime_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + config/mips/$(am__dirstamp): + @$(MKDIR_P) config/mips + @: > config/mips/$(am__dirstamp) +@@ -1940,6 +1950,8 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: \ + config/aarch64/$(am__dirstamp) + config/arm/libgdruntime_convenience_la-switchcontext.lo: \ + config/arm/$(am__dirstamp) ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: \ ++ config/loongarch/$(am__dirstamp) + config/mips/libgdruntime_convenience_la-switchcontext.lo: \ + config/mips/$(am__dirstamp) + config/powerpc/libgdruntime_convenience_la-switchcontext.lo: \ +@@ -1964,6 +1976,8 @@ mostlyclean-compile: + -rm -f config/arm/*.lo + -rm -f config/mingw/*.$(OBJEXT) + -rm -f config/mingw/*.lo ++ -rm -f config/loongarch/*.$(OBJEXT) ++ -rm -f config/loongarch/*.lo + -rm -f config/mips/*.$(OBJEXT) + -rm -f config/mips/*.lo + -rm -f config/powerpc/*.$(OBJEXT) +@@ -2087,7 +2101,10 @@ config/aarch64/libgdruntime_la-switchcontext.lo: config/aarch64/switchcontext.S + config/arm/libgdruntime_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + +-config/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S ++config/loongarch/libgdruntime_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) ++ ++onfig/mips/libgdruntime_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + + config/powerpc/libgdruntime_la-switchcontext.lo: config/powerpc/switchcontext.S +@@ -2111,6 +2128,9 @@ config/aarch64/libgdruntime_convenience_la-switchcontext.lo: config/aarch64/swit + config/arm/libgdruntime_convenience_la-switchcontext.lo: config/arm/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/arm/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/arm/switchcontext.S' || echo '$(srcdir)/'`config/arm/switchcontext.S + ++config/loongarch/libgdruntime_convenience_la-switchcontext.lo: config/loongarch/switchcontext.S ++ $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM ++ + config/mips/libgdruntime_convenience_la-switchcontext.lo: config/mips/switchcontext.S + $(AM_V_CPPAS)$(LIBTOOL) $(AM_V_lt) $(libgdruntime_convenience_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) -c -o config/mips/libgdruntime_convenience_la-switchcontext.lo `test -f 'config/mips/switchcontext.S' || echo '$(srcdir)/'`config/mips/switchcontext.S + +@@ -2158,6 +2178,7 @@ clean-libtool: + -rm -rf config/aarch64/.libs config/aarch64/_libs + -rm -rf config/arm/.libs config/arm/_libs + -rm -rf config/mingw/.libs config/mingw/_libs ++ -rm -rf config/loongarch/.libs config/loongarch/_libs + -rm -rf config/mips/.libs config/mips/_libs + -rm -rf config/powerpc/.libs config/powerpc/_libs + -rm -rf config/s390/.libs config/s390/_libs +@@ -2319,6 +2340,7 @@ distclean-generic: + -rm -f config/aarch64/$(am__dirstamp) + -rm -f config/arm/$(am__dirstamp) + -rm -f config/mingw/$(am__dirstamp) ++ -rm -f config/loongarch/$(am__dirstamp) + -rm -f config/mips/$(am__dirstamp) + -rm -f config/powerpc/$(am__dirstamp) + -rm -f config/s390/$(am__dirstamp) +diff --git a/libphobos/m4/druntime/cpu.m4 b/libphobos/m4/druntime/cpu.m4 +index db3a92c15..3461b2d3c 100644 +--- a/libphobos/m4/druntime/cpu.m4 ++++ b/libphobos/m4/druntime/cpu.m4 +@@ -15,6 +15,9 @@ AC_DEFUN([DRUNTIME_CPU_SOURCES], + ;; + arm*) druntime_target_cpu_parsed="arm" + ;; ++ loongarch*) ++ druntime_target_cpu_parsed="loongarch" ++ ;; + mips*) druntime_target_cpu_parsed="mips" + ;; + powerpc*) +@@ -34,6 +37,8 @@ AC_DEFUN([DRUNTIME_CPU_SOURCES], + [test "$druntime_target_cpu_parsed" = "aarch64"]) + AM_CONDITIONAL([DRUNTIME_CPU_ARM], + [test "$druntime_target_cpu_parsed" = "arm"]) ++ AM_CONDITIONAL([DRUNTIME_CPU_LOONGARCH], ++ [test "$druntime_target_cpu_parsed" = "loongarch"]) + AM_CONDITIONAL([DRUNTIME_CPU_MIPS], + [test "$druntime_target_cpu_parsed" = "mips"]) + AM_CONDITIONAL([DRUNTIME_CPU_POWERPC], +-- +2.43.0 + diff --git a/0189-LoongArch-fix-building-errors.patch b/0189-LoongArch-fix-building-errors.patch new file mode 100644 index 0000000..e6e930d --- /dev/null +++ b/0189-LoongArch-fix-building-errors.patch @@ -0,0 +1,273 @@ +From 142ae446cab26f1beb81a53a7da3c477ce42df40 Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Mon, 28 Oct 2024 09:02:51 +0000 +Subject: [PATCH] LoongArch: fix building errors. + +--- + config/mt-loongarch-mlib | 2 +- + gcc/config/loongarch/loongarch-evolution.h | 2 +- + gcc/config/loongarch/loongarch-opts.cc | 1 + + gcc/config/loongarch/loongarch-str.h | 11 +++--- + gcc/config/loongarch/loongarch.cc | 9 +---- + gcc/config/loongarch/loongarch.md | 44 ++++++++++++++++------ + gcc/config/loongarch/simd.md | 15 +++++--- + gcc/doc/invoke.texi | 3 +- + 8 files changed, 53 insertions(+), 34 deletions(-) + +diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib +index 4cfe568f1..bbbba277f 100644 +--- a/config/mt-loongarch-mlib ++++ b/config/mt-loongarch-mlib +@@ -1 +1 @@ +-FLAGS_FOR_TARGET += -fmultiflags ++FLAGS_FOR_TARGET += +diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h +index d64996481..7e8e602c7 100644 +--- a/gcc/config/loongarch/loongarch-evolution.h ++++ b/gcc/config/loongarch/loongarch-evolution.h +@@ -1,7 +1,7 @@ + /* Generated automatically by "genstr" from "isa-evolution.in". + Please do not edit this file directly. + +- Copyright (C) 2023 Free Software Foundation, Inc. ++ Copyright (C) 2023-2024 Free Software Foundation, Inc. + + This file is part of GCC. + +diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc +index 735daeb7c..1d08bb6a1 100644 +--- a/gcc/config/loongarch/loongarch-opts.cc ++++ b/gcc/config/loongarch/loongarch-opts.cc +@@ -1071,6 +1071,7 @@ loongarch_init_misc_options (struct gcc_options *opts, + + #undef INIT_TARGET_FLAG + ++#define TARGET_DIRECT_EXTERN_ACCESS_OPTS_P(opts) (((opts->x_target_flags) & MASK_DIRECT_EXTERN_ACCESS) != 0) + /* Set mexplicit-relocs default. */ + if (opts->x_la_opt_explicit_relocs == M_OPT_UNSET) + opts->x_la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS +diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h +index 3cbe12f7b..13d161a8c 100644 +--- a/gcc/config/loongarch/loongarch-str.h ++++ b/gcc/config/loongarch/loongarch-str.h +@@ -66,9 +66,10 @@ along with GCC; see the file COPYING3. If not see + #define STR_CMODEL_LARGE "large" + #define STR_CMODEL_EXTREME "extreme" + +-#define OPTSTR_FRECIPE "frecipe" +-#define OPTSTR_DIV32 "div32" +-#define OPTSTR_LAM_BH "lam-bh" +-#define OPTSTR_LAMCAS "lamcas" +-#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++#define OPTSTR_FRECIPE "frecipe" ++#define OPTSTR_DIV32 "div32" ++#define OPTSTR_LAM_BH "lam-bh" ++#define OPTSTR_LAMCAS "lamcas" ++#define OPTSTR_LD_SEQ_SA "ld-seq-sa" ++ + #endif /* LOONGARCH_STR_H */ +diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc +index 53bd8d7ec..6be0d80b3 100644 +--- a/gcc/config/loongarch/loongarch.cc ++++ b/gcc/config/loongarch/loongarch.cc +@@ -764,14 +764,7 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); +- +- /* For a C23 variadic function w/o any named argument, and w/o an +- artifical argument for large return value, skip advancing args. +- There is such an artifical argument iff. arg.type is non-NULL +- (PR 114175). */ +- if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)) +- || arg.type != NULL_TREE) +- loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); ++ loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; +diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md +index 66236a7c7..d8d444c7a 100644 +--- a/gcc/config/loongarch/loongarch.md ++++ b/gcc/config/loongarch/loongarch.md +@@ -32,6 +32,7 @@ + UNSPEC_FCLASS + UNSPEC_FMAX + UNSPEC_FMIN ++ UNSPEC_COPYSIGN + UNSPEC_FTINT + UNSPEC_FTINTRM + UNSPEC_FTINTRP +@@ -415,11 +416,13 @@ + + ;; A mode for anything with 32 bits or more, and able to be loaded with + ;; the same addressing mode as ld.w. +-(define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF]) ++;; (define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF]) ++(define_mode_iterator LD_AT_LEAST_32_BIT [(SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")]) + + ;; A mode for anything able to be stored with the same addressing mode as + ;; st.w. +-(define_mode_iterator ST_ANY [QHWD ANYF]) ++;; (define_mode_iterator ST_ANY [QHWD ANYF]) ++(define_mode_iterator ST_ANY [(QI "") (HI "") (SI "") (DI "TARGET_64BIT") (SF "TARGET_HARD_FLOAT") (DF "TARGET_DOUBLE_FLOAT")]) + + ;; A mode for anything legal as a input of a div or mod instruction. + (define_mode_iterator DIV [(DI "TARGET_64BIT") +@@ -590,6 +593,10 @@ + (define_code_attr sel [(eq "masknez") (ne "maskeqz")]) + (define_code_attr selinv [(eq "maskeqz") (ne "masknez")]) + ++(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1") ++ (UNSPEC_FTINTRM "0") ++ (UNSPEC_FTINTRP "0")]) ++ + ;; Iterator and attributes for floating-point to fixed-point conversion + ;; instructions. + (define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP]) +@@ -625,7 +632,8 @@ + ;; so the redundant sign extension can be removed if the output is used as + ;; an input of a bitwise operation. Note plus, rotl, and div are handled + ;; separately. +-(define_code_iterator shift_w [any_shift rotatert]) ++;; (define_code_iterator shift_w [any_shift rotatert]) ++(define_code_iterator shift_w [ashift ashiftrt lshiftrt rotatert]) + (define_code_iterator arith_w [minus mult]) + + (define_expand "3" +@@ -1324,8 +1332,9 @@ + + (define_insn "copysign3" + [(set (match_operand:ANYF 0 "register_operand" "=f") +- (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f") +- (match_operand:ANYF 2 "register_operand" "f")))] ++ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")] ++ UNSPEC_COPYSIGN))] + "TARGET_HARD_FLOAT" + "fcopysign.\t%0,%1,%2" + [(set_attr "type" "fcopysign") +@@ -2722,12 +2731,13 @@ + (set_attr "mode" "")]) + + ;; Convert floating-point numbers to integers ++;; ( == UNSPEC_FTINT + (define_insn "2" + [(set (match_operand:ANYFI 0 "register_operand" "=f") + (unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")] + LRINT))] + "TARGET_HARD_FLOAT && +- ( == UNSPEC_FTINT ++ ( + || flag_fp_int_builtin_inexact + || !flag_trapping_math)" + "ftint.. %0,%1" +@@ -4135,15 +4145,26 @@ + (136 "isnormal") + (952 "isfinite")]) + +-(define_expand "2" ++;;(define_expand "2" ++;; [(match_operand:SI 0 "register_operand" "=r") ++;; (match_operand:ANYF 1 "register_operand" " f") ++;; (const_int FCLASS_MASK)] ++;; "TARGET_HARD_FLOAT" ++;; { ++;; rtx ft0 = gen_reg_rtx (SImode); ++;; rtx t0 = gen_reg_rtx (word_mode); ++;; rtx mask = GEN_INT (); ++ ++(define_expand "fclass_optab2" ++ [(unspec:ANYF + [(match_operand:SI 0 "register_operand" "=r") +- (match_operand:ANYF 1 "register_operand" " f") +- (const_int FCLASS_MASK)] ++ (match_operand:ANYF 1 "register_operand" " f")] ++ UNSPEC_FCLASS)] + "TARGET_HARD_FLOAT" + { + rtx ft0 = gen_reg_rtx (SImode); + rtx t0 = gen_reg_rtx (word_mode); +- rtx mask = GEN_INT (); ++ rtx mask = GEN_INT (GET_MODE_MASK (mode)); + + emit_insn (gen_fclass_ (ft0, operands[1])); + +@@ -4165,7 +4186,8 @@ + emit_move_insn (operands[0], t0); + + DONE; +- }) ++ } ++ [(set_attr "mode" "")]) + + (define_insn "bytepick_w_" + [(set (match_operand:SI 0 "register_operand" "=r") +diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md +index c28b95282..9e4c08196 100644 +--- a/gcc/config/loongarch/simd.md ++++ b/gcc/config/loongarch/simd.md +@@ -30,10 +30,13 @@ + (define_mode_iterator FLASX [V4DF V8SF]) + + ;; All integer modes available +-(define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) ++;; (define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) ++(define_mode_iterator IVEC [(V2DI "ISA_HAS_LSX") (V4SI "ISA_HAS_LSX") (V8HI "ISA_HAS_LSX") (V16QI "ISA_HAS_LSX") ++ (V4DI "ISA_HAS_LASX") (V8SI "ISA_HAS_LASX") (V16HI "ISA_HAS_LASX") (V32QI "ISA_HAS_LASX")]) + + ;; All FP modes available +-(define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")]) ++(define_mode_iterator FVEC [(V2DF "ISA_HAS_LSX") (V4SF "ISA_HAS_LSX") ++ (V4DF "ISA_HAS_LASX") (V8SF "ISA_HAS_LASX")]) + + ;; Mnemonic prefix, "x" for LASX modes. + (define_mode_attr x [(V2DI "") (V4SI "") (V8HI "") (V16QI "") +@@ -162,12 +165,12 @@ + ;; Expand the standard-named patterns to vfrint instructions if + ;; raising inexact exception is allowed. + ++;; " == UNSPEC_SIMD_FRINT || + (define_expand "2" + [(set (match_operand:FVEC 0 "register_operand" "=f") + (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] + SIMD_FRINT))] +- " == UNSPEC_SIMD_FRINT || +- flag_fp_int_builtin_inexact || ++ "flag_fp_int_builtin_inexact || + !flag_trapping_math") + + ;; ftrunc is like btrunc, but it's allowed to raise inexact exception +@@ -221,13 +224,13 @@ + ;; Expand the standard-named patterns to vftint instructions if + ;; raising inexact exception. + ++;; " == UNSPEC_SIMD_FRINT || + (define_expand "l2" + [(set (match_operand: 0 "register_operand" "=f") + (fix: + (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")] + SIMD_FRINT)))] +- " == UNSPEC_SIMD_FRINT || +- flag_fp_int_builtin_inexact || ++ "flag_fp_int_builtin_inexact || + !flag_trapping_math") + + ;; fix_trunc is allowed to raise inexact exception even if +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index d2c52cdf4..8a09938fc 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1006,8 +1006,7 @@ Objective-C and Objective-C++ Dialects}. + -mcond-move-float -mno-cond-move-float @gol + -memcpy -mno-memcpy -mstrict-align -mno-strict-align @gol + -mmax-inline-memcpy-size=@var{n} @gol +--mexplicit-relocs -mno-explicit-relocs @gol +--mdirect-extern-access -mno-direct-extern-access @gol ++-mexplicit-relocs=@var{style} -mexplicit-relocs -mno-explicit-relocs @gol + -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as @gol + -mrecip -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32 @gol + -mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa @gol +-- +2.45.2 + diff --git a/0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch b/0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch new file mode 100644 index 0000000..5f31522 --- /dev/null +++ b/0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch @@ -0,0 +1,119 @@ +From 13dfb01e5c30c3bd09333ac79d6ff96a617fea67 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Thu, 3 Aug 2023 13:11:12 +0200 +Subject: [PATCH] tree-optimization/110702 - avoid zero-based memory references + in IVOPTs + +Sometimes IVOPTs chooses a weird induction variable which downstream +leads to issues. Most of the times we can fend those off during costing +by rejecting the candidate but it looks like the address description +costing synthesizes is different from what we end up generating so +the following fixes things up at code generation time. Specifically +we avoid the create_mem_ref_raw fallback which uses a literal zero +address base with the actual base in index2. For the case in question +we have the address + + type = unsigned long + offset = 0 + elements = { + [0] = &e * -3, + [1] = (sizetype) a.9_30 * 232, + [2] = ivtmp.28_44 * 4 + } + +from which we code generate the problematical + + _3 = MEM[(long int *)0B + ivtmp.36_9 + ivtmp.28_44 * 4]; + +which references the object at address zero. The patch below +recognizes the fallback after the fact and transforms the +TARGET_MEM_REF memory reference into a LEA for which this form +isn't problematic: + + _24 = &MEM[(long int *)0B + ivtmp.36_34 + ivtmp.28_44 * 4]; + _3 = *_24; + +hereby avoiding the correctness issue. We'd later conclude the +program terminates at the null pointer dereference and make the +function pure, miscompling the main function of the testcase. + + PR tree-optimization/110702 + * tree-ssa-loop-ivopts.cc (rewrite_use_address): When + we created a NULL pointer based access rewrite that to + a LEA. + + * gcc.dg/torture/pr110702.c: New testcase. +--- + gcc/testsuite/gcc.dg/torture/pr110702.c | 31 +++++++++++++++++++++++++ + gcc/tree-ssa-loop-ivopts.cc | 17 +++++++++++++- + 2 files changed, 47 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/torture/pr110702.c + +diff --git a/gcc/testsuite/gcc.dg/torture/pr110702.c b/gcc/testsuite/gcc.dg/torture/pr110702.c +new file mode 100644 +index 00000000000..aab9c7d923e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr110702.c +@@ -0,0 +1,31 @@ ++/* { dg-do run } */ ++ ++void abort (void); ++ ++int a, b, c, d; ++long e[9][7][4]; ++ ++void f() ++{ ++ for (; a >= 0; a--) ++ { ++ b = 0; ++ for (; b <= 3; b++) ++ { ++ c = 0; ++ for (; c <= 3; c++) ++ { ++ int *g = &d; ++ *g = e[0][0][b] | e[a][b][a]; ++ } ++ } ++ } ++} ++ ++int main() ++{ ++ f(); ++ if (a != -1) ++ abort (); ++ return 0; ++} +diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc +index 92fc1c7d734..934897af691 100644 +--- a/gcc/tree-ssa-loop-ivopts.cc ++++ b/gcc/tree-ssa-loop-ivopts.cc +@@ -7630,7 +7630,22 @@ rewrite_use_address (struct ivopts_data *data, + true, GSI_SAME_STMT); + } + else +- copy_ref_info (ref, *use->op_p); ++ { ++ /* When we end up confused enough and have no suitable base but ++ stuffed everything to index2 use a LEA for the address and ++ create a plain MEM_REF to avoid basing a memory reference ++ on address zero which create_mem_ref_raw does as fallback. */ ++ if (TREE_CODE (ref) == TARGET_MEM_REF ++ && TMR_INDEX2 (ref) != NULL_TREE ++ && integer_zerop (TREE_OPERAND (ref, 0))) ++ { ++ ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref); ++ ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, ++ true, GSI_SAME_STMT); ++ ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type)); ++ } ++ copy_ref_info (ref, *use->op_p); ++ } + + *use->op_p = ref; + } +-- +2.45.2 + diff --git a/0191-LoongArch-Change-OSDIR-for-distribution.patch b/0191-LoongArch-Change-OSDIR-for-distribution.patch new file mode 100644 index 0000000..374588c --- /dev/null +++ b/0191-LoongArch-Change-OSDIR-for-distribution.patch @@ -0,0 +1,25 @@ +From 25423cf92026221b7c8798533c40d3e6269a1d7c Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Thu, 31 Oct 2024 02:01:49 +0000 +Subject: [PATCH] LoongArch: Change OSDIR for distribution + +Signed-off-by: Peng Fan +--- + gcc/config/loongarch/t-linux | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux +index 7cd7cde25..1d1f42596 100644 +--- a/gcc/config/loongarch/t-linux ++++ b/gcc/config/loongarch/t-linux +@@ -28,4 +28,7 @@ ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),) + MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f) + MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s) + ++else ++ MULTILIB_OSDIRNAMES := ../lib64 ++ + endif +-- +2.45.2 + diff --git a/gcc.spec b/gcc.spec index 9262a52..1daa3a0 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 31 +%global gcc_release 32 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -72,9 +72,6 @@ %global _lib lib %global _smp_mflags -j8 %endif -%ifarch loongarch64 -%global _lib lib -%endif %global isl_enable 0 %global check_enable 0 @@ -338,6 +335,198 @@ Patch3127: LoongArch-Use-finer-grained-DBAR-hints.patch Patch3128: LoongArch-Add-LA664-support.patch Patch3129: LoongArch-Fix-internal-error-running-gcc-march-nativ.patch Patch3130: LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch +# -- Update to master, lastest commit: 60e99901aef8e7efd4d60adf9f82021fcbd1101f +Patch3131: 0001-LoongArch-Reimplement-multilib-build-option-handling.patch +Patch3132: 0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch +Patch3133: 0003-Modify-gas-uleb128-support-test.patch +Patch3134: 0004-LoongArch-Optimizations-of-vector-construction.patch +Patch3135: 0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch +Patch3136: 0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch +Patch3137: 0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch +Patch3138: 0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch +Patch3139: 0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch +Patch3140: 0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch +Patch3141: 0011-LoongArch-Implement-avg-and-sad-standard-names.patch +Patch3142: 0012-LoongArch-Implement-vec_widen-standard-names.patch +Patch3143: 0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch +Patch3144: 0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch +Patch3145: 0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch +Patch3146: 0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch +Patch3147: 0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch +Patch3148: 0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch +Patch3149: 0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch +Patch3150: 0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch +Patch3151: 0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch +Patch3152: 0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch +Patch3153: 0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch +Patch3154: 0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch +Patch3155: 0025-LoongArch-Optimize-single-used-address-with-mexplici.patch +Patch3156: 0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch +Patch3157: 0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch +Patch3158: 0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch +Patch3159: 0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch +Patch3160: 0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch +Patch3161: 0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch +Patch3162: 0032-LoongArch-Add-code-generation-support-for-call36-fun.patch +Patch3163: 0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch +Patch3164: 0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch +Patch3165: 0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch +Patch3166: 0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch +Patch3167: 0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch +Patch3168: 0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch +Patch3169: 0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch +Patch3170: 0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch +Patch3171: 0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch +Patch3172: 0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch +Patch3173: 0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch +Patch3174: 0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch +Patch3175: 0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch +Patch3176: 0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch +Patch3177: 0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch +Patch3178: 0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch +Patch3179: 0049-LoongArch-Remove-lrint_allow_inexact.patch +Patch3180: 0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch +Patch3181: 0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch +Patch3182: 0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch +Patch3183: 0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch +Patch3184: 0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch +Patch3185: 0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch +Patch3186: 0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch +Patch3187: 0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch +Patch3188: 0058-LoongArch-Add-support-for-xorsign.patch +Patch3189: 0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch +Patch3190: 0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch +Patch3191: 0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch +Patch3192: 0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch +Patch3193: 0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch +Patch3194: 0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch +Patch3195: 0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch +Patch3196: 0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch +Patch3197: 0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch +Patch3198: 0068-LoongArch-Fix-warnings-building-libgcc.patch +Patch3199: 0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch +Patch3200: 0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch +Patch3201: 0071-LoongArch-Fix-instruction-costs-PR112936.patch +Patch3202: 0072-LoongArch-Add-alslsi3_extend.patch +Patch3203: 0073-LoongArch-Add-support-for-D-frontend.patch +Patch3204: 0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch +Patch3205: 0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch +Patch3206: 0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch +Patch3207: 0077-LoongArch-Clean-up-vec_init-expander.patch +Patch3208: 0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch +Patch3209: 0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch +Patch3210: 0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch +Patch3211: 0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch +Patch3212: 0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch +Patch3213: 0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch +Patch3214: 0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch +Patch3215: 0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch +Patch3216: 0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch +Patch3217: 0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch +Patch3218: 0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch +Patch3219: 0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch +Patch3220: 0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch +Patch3221: 0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch +Patch3222: 0092-LoongArch-Added-TLS-Le-Relax-support.patch +Patch3223: 0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch +Patch3224: 0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch +Patch3225: 0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch +Patch3226: 0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch +Patch3227: 0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch +Patch3228: 0098-LoongArch-testsuite-Added-additional-vectorization-m.patch +Patch3229: 0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch +Patch3230: 0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch +Patch3231: 0101-LoongArch-Improve-lasx_xvpermi_q_-LASX-mode-insn-pat.patch +Patch3232: 0102-LoongArch-Implement-vec_init-M-N-where-N-is-a-LSX-ve.patch +Patch3233: 0103-LoongArch-Handle-ISA-evolution-switches-along-with-o.patch +Patch3234: 0104-LoongArch-Rename-ISA_BASE_LA64V100-to-ISA_BASE_LA64.patch +Patch3235: 0105-LoongArch-Use-enums-for-constants.patch +Patch3236: 0106-LoongArch-Simplify-mexplicit-reloc-definitions.patch +Patch3237: 0107-LoongArch-testsuite-Add-loongarch-support-to-slp-21..patch +Patch3238: 0108-LoongArch-Optimized-some-of-the-symbolic-expansion-i.patch +Patch3239: 0109-LoongArch-Implement-option-save-restore.patch +Patch3240: 0110-LoongArch-Redundant-sign-extension-elimination-optim.patch +Patch3241: 0111-LoongArch-Redundant-sign-extension-elimination-optim.patch +Patch3242: 0112-LoongArch-Assign-the-u-attribute-to-the-mem-to-which.patch +Patch3243: 0113-LoongArch-testsuite-Fix-fail-in-gen-vect-2-25-.c-fil.patch +Patch3244: 0114-LoongArch-Remove-constraint-z-from-movsi_internal.patch +Patch3245: 0115-LoongArch-doc-Add-attribute-descriptions-defined-in-.patch +Patch3246: 0116-LoongArch-Disable-explicit-reloc-for-TLS-LD-GD-with-.patch +Patch3247: 0117-LoongArch-testsuite-Disable-stack-protector-for-got-.patch +Patch3248: 0118-LoongArch-Disable-TLS-type-symbols-from-generating-n.patch +Patch3249: 0119-LoongArch-Remove-vec_concatz-mode-pattern.patch +Patch3250: 0120-LoongArch-Optimize-implementation-of-single-precisio.patch +Patch3251: 0121-LoongArch-Define-LOGICAL_OP_NON_SHORT_CIRCUIT.patch +Patch3252: 0122-LoongArch-Split-vec_selects-of-bottom-elements-into-.patch +Patch3253: 0123-LoongArch-Modify-the-address-calculation-logic-for-o.patch +Patch3254: 0124-LoongArch-Merge-template-got_load_tls_-ld-gd-le-ie.patch +Patch3255: 0125-LoongArch-Add-the-macro-implementation-of-mcmodel-ex.patch +Patch3256: 0126-LoongArch-Enable-explicit-reloc-for-extreme-TLS-GD-L.patch +Patch3257: 0127-LoongArch-Added-support-for-loading-__get_tls_addr-s.patch +Patch3258: 0128-LoongArch-Don-t-split-the-instructions-containing-re.patch +Patch3259: 0129-LoongArch-Adjust-cost-of-vector_stmt-that-match-mult.patch +Patch3260: 0130-LoongArch-Fix-incorrect-return-type-for-frecipe-frsq.patch +Patch3261: 0131-LoongArch-Fix-an-ODR-violation.patch +Patch3262: 0132-LoongArch-testsuite-Fix-gcc.dg-vect-vect-reduc-mul_-.patch +Patch3263: 0133-LoongArch-Avoid-out-of-bounds-access-in-loongarch_sy.patch +Patch3264: 0134-LoongArch-Fix-wrong-LSX-FP-vector-negation.patch +Patch3265: 0135-LoongArch-Fix-wrong-return-value-type-of-__iocsrrd_h.patch +Patch3266: 0136-LoongArch-Remove-redundant-symbol-type-conversions-i.patch +Patch3267: 0137-LoongArch-When-checking-whether-the-assembler-suppor.patch +Patch3268: 0138-LoongArch-Don-t-falsely-claim-gold-supported-in-topl.patch +Patch3269: 0139-LoongArch-NFC-Deduplicate-crc-instruction-defines.patch +Patch3270: 0140-LoongArch-Remove-unneeded-sign-extension-after-crc-c.patch +Patch3271: 0141-LoongArch-Allow-s9-as-a-register-alias.patch +Patch3272: 0142-LoongArch-testsuite-Rewrite-x-vfcmp-d-f-.c-to-avoid-.patch +Patch3273: 0143-LoongArch-Use-lib-instead-of-lib64-as-the-library-se.patch +Patch3274: 0144-LoongArch-testsuite-Fix-problems-with-incorrect-resu.patch +Patch3275: 0145-LoongArch-Fixed-an-issue-with-the-implementation-of-.patch +Patch3276: 0146-LoongArch-testsuite-Add-compilation-options-to-the-r.patch +Patch3277: 0147-LoongArch-Emit-R_LARCH_RELAX-for-TLS-IE-with-non-ext.patch +Patch3278: 0148-LoongArch-Remove-unused-and-incorrect-sge-u-_-X-mode.patch +Patch3279: 0149-LoongArch-Remove-masking-process-for-operand-3-of-xv.patch +Patch3280: 0150-LoongArch-Fix-C23-.-functions-returning-large-aggreg.patch +Patch3281: 0151-LoongArch-Remove-unused-useless-definitions.patch +Patch3282: 0152-LoongArch-Change-loongarch_expand_vec_cmp-s-return-t.patch +Patch3283: 0153-LoongArch-Combine-UNITS_PER_FP_REG-and-UNITS_PER_FPR.patch +Patch3284: 0154-LoongArch-Fix-a-typo-PR-114407.patch +Patch3285: 0155-testsuite-Add-a-test-case-for-negating-FP-vectors-co.patch +Patch3286: 0156-LoongArch-Add-descriptions-of-the-compilation-option.patch +Patch3287: 0157-LoongArch-Split-loongarch_option_override_internal-i.patch +Patch3288: 0158-LoongArch-Regenerate-loongarch.opt.urls.patch +Patch3289: 0159-LoongArch-Add-support-for-TLS-descriptors.patch +Patch3290: 0160-LoongArch-Fix-missing-plugin-header.patch +Patch3291: 0161-LoongArch-Remove-unused-code.patch +Patch3292: 0162-LoongArch-Set-default-alignment-for-functions-jumps-.patch +Patch3293: 0163-LoongArch-Enable-switchable-target.patch +Patch3294: 0164-LoongArch-Define-ISA-versions.patch +Patch3295: 0165-LoongArch-Define-builtin-macros-for-ISA-evolutions.patch +Patch3296: 0166-LoongArch-Add-constraints-for-bit-string-operation-d.patch +Patch3297: 0167-LoongArch-Guard-REGNO-with-REG_P-in-loongarch_expand.patch +Patch3298: 0168-LoongArch-Fix-mode-size-comparision-in-loongarch_exp.patch +Patch3299: 0169-LoongArch-Use-bstrins-for-value-1u-const.patch +Patch3300: 0170-LoongArch-Tweak-IOR-rtx_cost-for-bstrins.patch +Patch3301: 0171-LoongArch-NFC-Dedup-and-sort-the-comment-in-loongarc.patch +Patch3302: 0172-LoongArch-Fix-explicit-relocs-extreme-tls-desc.c-tes.patch +Patch3303: 0173-LoongArch-Define-loongarch_insn_cost-and-set-the-cos.patch +Patch3304: 0174-LoongArch-TFmode-is-not-allowed-to-be-stored-in-the-.patch +Patch3305: 0175-LoongArch-Remove-unreachable-codes.patch +Patch3306: 0176-LoongArch-Organize-the-code-related-to-split-move-an.patch +Patch3307: 0177-LoongArch-Expand-some-SImode-operations-through-si3_.patch +Patch3308: 0178-LoongArch-Relax-ins_zero_bitmask_operand-and-remove-.patch +Patch3309: 0179-LoongArch-Rework-bswap-hi-si-di-2-definition.patch +Patch3310: 0180-testsuite-fix-dg-do-preprocess-typo.patch +Patch3311: 0181-LoongArch-Remove-gawk-extension-from-a-generator-scr.patch +Patch3312: 0182-LoongArch-Use-iorn-and-andn-standard-pattern-names.patch +Patch3313: 0183-LoongArch-Drop-vcond-u-expanders.patch +Patch3314: 0184-LoongArch-Provide-ashr-lshr-and-ashl-RTL-pattern-for.patch +Patch3315: 0185-LoongArch-Implement-scalar-isinf-isnormal-and-isfini.patch +Patch3316: 0186-LoongArch-Add-support-to-annotate-tablejump.patch +Patch3317: 0187-LoongArch-Fix-up-r15-4130.patch +Patch3318: 0188-libphobos-Update-build-scripts-for-LoongArch64.patch +Patch3319: 0189-LoongArch-fix-building-errors.patch +Patch3320: 0190-tree-optimization-110702-avoid-zero-based-memory-ref.patch +Patch3321: 0191-LoongArch-Change-OSDIR-for-distribution.patch %endif # On ARM EABI systems, we do want -gnueabi to be part of the @@ -1004,6 +1193,198 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch3128 -p1 %patch3129 -p1 %patch3130 -p1 +#-- +%patch3131 -p1 +%patch3132 -p1 +%patch3133 -p1 +%patch3134 -p1 +%patch3135 -p1 +%patch3136 -p1 +%patch3137 -p1 +%patch3138 -p1 +%patch3139 -p1 +%patch3140 -p1 +%patch3141 -p1 +%patch3142 -p1 +%patch3143 -p1 +%patch3144 -p1 +%patch3145 -p1 +%patch3146 -p1 +%patch3147 -p1 +%patch3148 -p1 +%patch3149 -p1 +%patch3150 -p1 +%patch3151 -p1 +%patch3152 -p1 +%patch3153 -p1 +%patch3154 -p1 +%patch3155 -p1 +%patch3156 -p1 +%patch3157 -p1 +%patch3158 -p1 +%patch3159 -p1 +%patch3160 -p1 +%patch3161 -p1 +%patch3162 -p1 +%patch3163 -p1 +%patch3164 -p1 +%patch3165 -p1 +%patch3166 -p1 +%patch3167 -p1 +%patch3168 -p1 +%patch3169 -p1 +%patch3170 -p1 +%patch3171 -p1 +%patch3172 -p1 +%patch3173 -p1 +%patch3174 -p1 +%patch3175 -p1 +%patch3176 -p1 +%patch3177 -p1 +%patch3178 -p1 +%patch3179 -p1 +%patch3180 -p1 +%patch3181 -p1 +%patch3182 -p1 +%patch3183 -p1 +%patch3184 -p1 +%patch3185 -p1 +%patch3186 -p1 +%patch3187 -p1 +%patch3188 -p1 +%patch3189 -p1 +%patch3190 -p1 +%patch3191 -p1 +%patch3192 -p1 +%patch3193 -p1 +%patch3194 -p1 +%patch3195 -p1 +%patch3196 -p1 +%patch3197 -p1 +%patch3198 -p1 +%patch3199 -p1 +%patch3200 -p1 +%patch3201 -p1 +%patch3202 -p1 +%patch3203 -p1 +%patch3204 -p1 +%patch3205 -p1 +%patch3206 -p1 +%patch3207 -p1 +%patch3208 -p1 +%patch3209 -p1 +%patch3210 -p1 +%patch3211 -p1 +%patch3212 -p1 +%patch3213 -p1 +%patch3214 -p1 +%patch3215 -p1 +%patch3216 -p1 +%patch3217 -p1 +%patch3218 -p1 +%patch3219 -p1 +%patch3220 -p1 +%patch3221 -p1 +%patch3222 -p1 +%patch3223 -p1 +%patch3224 -p1 +%patch3225 -p1 +%patch3226 -p1 +%patch3227 -p1 +%patch3228 -p1 +%patch3229 -p1 +%patch3230 -p1 +%patch3231 -p1 +%patch3232 -p1 +%patch3233 -p1 +%patch3234 -p1 +%patch3235 -p1 +%patch3236 -p1 +%patch3237 -p1 +%patch3238 -p1 +%patch3239 -p1 +%patch3240 -p1 +%patch3241 -p1 +%patch3242 -p1 +%patch3243 -p1 +%patch3244 -p1 +%patch3245 -p1 +%patch3246 -p1 +%patch3247 -p1 +%patch3248 -p1 +%patch3249 -p1 +%patch3250 -p1 +%patch3251 -p1 +%patch3252 -p1 +%patch3253 -p1 +%patch3254 -p1 +%patch3255 -p1 +%patch3256 -p1 +%patch3257 -p1 +%patch3258 -p1 +%patch3259 -p1 +%patch3260 -p1 +%patch3261 -p1 +%patch3262 -p1 +%patch3263 -p1 +%patch3264 -p1 +%patch3265 -p1 +%patch3266 -p1 +%patch3267 -p1 +%patch3268 -p1 +%patch3269 -p1 +%patch3270 -p1 +%patch3271 -p1 +%patch3272 -p1 +%patch3273 -p1 +%patch3274 -p1 +%patch3275 -p1 +%patch3276 -p1 +%patch3277 -p1 +%patch3278 -p1 +%patch3279 -p1 +%patch3280 -p1 +%patch3281 -p1 +%patch3282 -p1 +%patch3283 -p1 +%patch3284 -p1 +%patch3285 -p1 +%patch3286 -p1 +%patch3287 -p1 +%patch3288 -p1 +%patch3289 -p1 +%patch3290 -p1 +%patch3291 -p1 +%patch3292 -p1 +%patch3293 -p1 +%patch3294 -p1 +%patch3295 -p1 +%patch3296 -p1 +%patch3297 -p1 +%patch3298 -p1 +%patch3299 -p1 +%patch3300 -p1 +%patch3301 -p1 +%patch3302 -p1 +%patch3303 -p1 +%patch3304 -p1 +%patch3305 -p1 +%patch3306 -p1 +%patch3307 -p1 +%patch3308 -p1 +%patch3309 -p1 +%patch3310 -p1 +%patch3311 -p1 +%patch3312 -p1 +%patch3313 -p1 +%patch3314 -p1 +%patch3315 -p1 +%patch3316 -p1 +%patch3317 -p1 +%patch3318 -p1 +%patch3319 -p1 +%patch3320 -p1 +%patch3321 -p1 %endif echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -3268,6 +3649,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Oct 29 2024 Peng Fan - 12.3.1-32 +- Type: Sync +- DESC: +- LoongArch: Sync patch from upstream +- Tweaks OSDIR are consistent with most other distributions. + * Thu Jul 11 2024 huyubiao - 12.3.1-31 - Type:SPEC - ID:NA -- Gitee